Source code for celery.worker.consumer.gossip

"""Worker <-> Worker communication Bootstep."""
from collections import defaultdict
from functools import partial
from heapq import heappush
from operator import itemgetter

from kombu import Consumer
from kombu.asynchronous.semaphore import DummyLock
from kombu.exceptions import ContentDisallowed, DecodeError

from celery import bootsteps
from celery.utils.log import get_logger
from celery.utils.objects import Bunch

from .mingle import Mingle

__all__ = ('Gossip',)

logger = get_logger(__name__)
debug, info = logger.debug, logger.info


[docs]class Gossip(bootsteps.ConsumerStep): """Bootstep consuming events from other workers. This keeps the logical clock value up to date. """ label = 'Gossip' requires = (Mingle,) _cons_stamp_fields = itemgetter( 'id', 'clock', 'hostname', 'pid', 'topic', 'action', 'cver', ) compatible_transports = {'amqp', 'redis'} def __init__(self, c, without_gossip=False, interval=5.0, heartbeat_interval=2.0, **kwargs): self.enabled = not without_gossip and self.compatible_transport(c.app) self.app = c.app c.gossip = self self.Receiver = c.app.events.Receiver self.hostname = c.hostname self.full_hostname = '.'.join([self.hostname, str(c.pid)]) self.on = Bunch( node_join=set(), node_leave=set(), node_lost=set(), ) self.timer = c.timer if self.enabled: self.state = c.app.events.State( on_node_join=self.on_node_join, on_node_leave=self.on_node_leave, max_tasks_in_memory=1, ) if c.hub: c._mutex = DummyLock() self.update_state = self.state.event self.interval = interval self.heartbeat_interval = heartbeat_interval self._tref = None self.consensus_requests = defaultdict(list) self.consensus_replies = {} self.event_handlers = { 'worker.elect': self.on_elect, 'worker.elect.ack': self.on_elect_ack, } self.clock = c.app.clock self.election_handlers = { 'task': self.call_task } super().__init__(c, **kwargs)
[docs] def compatible_transport(self, app): with app.connection_for_read() as conn: return conn.transport.driver_type in self.compatible_transports
[docs] def election(self, id, topic, action=None): self.consensus_replies[id] = [] self.dispatcher.send( 'worker-elect', id=id, topic=topic, action=action, cver=1, )
[docs] def call_task(self, task): try: self.app.signature(task).apply_async() except Exception as exc: # pylint: disable=broad-except logger.exception('Could not call task: %r', exc)
[docs] def on_elect(self, event): try: (id_, clock, hostname, pid, topic, action, _) = self._cons_stamp_fields(event) except KeyError as exc: return logger.exception('election request missing field %s', exc) heappush( self.consensus_requests[id_], (clock, f'{hostname}.{pid}', topic, action), ) self.dispatcher.send('worker-elect-ack', id=id_)
[docs] def start(self, c): super().start(c) self.dispatcher = c.event_dispatcher
[docs] def on_elect_ack(self, event): id = event['id'] try: replies = self.consensus_replies[id] except KeyError: return # not for us alive_workers = set(self.state.alive_workers()) replies.append(event['hostname']) if len(replies) >= len(alive_workers): _, leader, topic, action = self.clock.sort_heap( self.consensus_requests[id], ) if leader == self.full_hostname: info('I won the election %r', id) try: handler = self.election_handlers[topic] except KeyError: logger.exception('Unknown election topic %r', topic) else: handler(action) else: info('node %s elected for %r', leader, id) self.consensus_requests.pop(id, None) self.consensus_replies.pop(id, None)
[docs] def on_node_join(self, worker): debug('%s joined the party', worker.hostname) self._call_handlers(self.on.node_join, worker)
[docs] def on_node_leave(self, worker): debug('%s left', worker.hostname) self._call_handlers(self.on.node_leave, worker)
[docs] def on_node_lost(self, worker): info('missed heartbeat from %s', worker.hostname) self._call_handlers(self.on.node_lost, worker)
def _call_handlers(self, handlers, *args, **kwargs): for handler in handlers: try: handler(*args, **kwargs) except Exception as exc: # pylint: disable=broad-except logger.exception( 'Ignored error from handler %r: %r', handler, exc)
[docs] def register_timer(self): if self._tref is not None: self._tref.cancel() self._tref = self.timer.call_repeatedly(self.interval, self.periodic)
[docs] def periodic(self): workers = self.state.workers dirty = set() for worker in workers.values(): if not worker.alive: dirty.add(worker) self.on_node_lost(worker) for worker in dirty: workers.pop(worker.hostname, None)
[docs] def get_consumers(self, channel): self.register_timer() ev = self.Receiver(channel, routing_key='worker.#', queue_ttl=self.heartbeat_interval) return [Consumer( channel, queues=[ev.queue], on_message=partial(self.on_message, ev.event_from_message), no_ack=True )]
[docs] def on_message(self, prepare, message): _type = message.delivery_info['routing_key'] # For redis when `fanout_patterns=False` (See Issue #1882) if _type.split('.', 1)[0] == 'task': return try: handler = self.event_handlers[_type] except KeyError: pass else: return handler(message.payload) # proto2: hostname in header; proto1: in body hostname = (message.headers.get('hostname') or message.payload['hostname']) if hostname != self.hostname: try: _, event = prepare(message.payload) self.update_state(event) except (DecodeError, ContentDisallowed, TypeError) as exc: logger.error(exc) else: self.clock.forward()
Back to Top