We have an HA RabbitMQ cluster, where we testing the HA functionality.
Our application structure is like following:
async def handle_message(self, message: bytes) -> None:
# Here I process the messages...
await get_channel_layer().group_send(
"websocket", {"type": "websocket.message", "message": json.loads(message)}
)
def _random_letters(self, n: int) -> str:
return "".join(random.choice(string.ascii_letters) for i in range(n))
async def process_messages(self):
channel_layer = get_channel_layer()
carehare_connection = await channel_layer.carehare_connection
self.queue_name = f"changes_{self._random_letters(12)}"
await carehare_connection.exchange_declare(
exchange_name=self.exchange, exchange_type="fanout"
)
await carehare_connection.queue_declare(
queue_name=self.queue_name,
durable=True,
arguments={"x-queue-type": "quorum", "x-expires": 5},
)
await carehare_connection.queue_bind(
exchange_name=self.exchange, queue_name=self.queue_name
)
self.logger.info(f"Connected to queue {self.queue_name}: ")
async with carehare_connection.acking_consumer(self.queue_name) as consumer:
async for message in consumer:
await self.handle_message(message)
def handle(self, *args, **options):
self.exchange = options.get("exchange") or "test"
asyncio.run(self.process_messages())
class UpdateTopologyConsumer(JsonWebsocketConsumer):
def connect(self):
async_to_sync(self.channel_layer.group_add)("websocket", self.channel_name)
self.accept()
def websocket_message(self, message):
print(message, flush=True)
self.send_json(message)
def disconnect(self, close_code):
self.close()
Now, we face the problem, that if one RabbitMQ node in the cluster goes down, the application breaks:
Consumer Error
backend> python manage.py listenonupdates changes -v 3
2021-03-12 12:31:19,713 | INFO | Connect to RabbitMQ and subscribe to exchange: changes
2021-03-12 12:31:19,888 | INFO | Connected to queue changes_PHnjRResdlDD:
2021-03-12 12:33:08,772 | INFO | Message received:
2021-03-12 12:33:08,772 | INFO | b'{"bla": "bla bla"}'
Disconnected from RabbitMQ: RabbitMQ closed the connection: 320 CONNECTION_FORCED - Node was put into maintenance mode. Will reconnect.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/core.py", line 263, in _reconnect_forever
await connection.closed
carehare._exceptions.ConnectionClosedByServer: RabbitMQ closed the connection: 320 CONNECTION_FORCED - Node was put into maintenance mode
Closing consumer
Traceback (most recent call last):
File "/usr/src/app/backend/updatetopology/management/commands/listenonupdates.py", line 68, in process_messages
async for message in consumer:
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 74, in __anext__
message, self._yielded_delivery_tag = await _next_delivery(
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 50, in _next_delivery
closed.result() # raise exception if there is one
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/reader.py", line 37, in consume_into_multi_queue_until_connection_close
multi_queue.put_nowait(
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 223, in __aexit__
await self.closed
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/reader.py", line 32, in consume_into_multi_queue_until_connection_close
body, delivery_tag = await consumer.next_delivery()
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 196, in next_delivery
return await _next_delivery(self._queue, self.closed)
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 50, in _next_delivery
closed.result() # raise exception if there is one
carehare._exceptions.ConnectionClosed
Traceback (most recent call last):
File "/usr/src/app/backend/manage.py", line 22, in <module>
main()
File "/usr/src/app/backend/manage.py", line 18, in main
execute_from_command_line(sys.argv)
File "/usr/local/lib/python3.9/site-packages/django/core/management/__init__.py", line 401, in execute_from_command_line
utility.execute()
File "/usr/local/lib/python3.9/site-packages/django/core/management/__init__.py", line 395, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/usr/local/lib/python3.9/site-packages/django/core/management/base.py", line 330, in run_from_argv
self.execute(*args, **cmd_options)
File "/usr/local/lib/python3.9/site-packages/django/core/management/base.py", line 371, in execute
output = self.handle(*args, **options)
File "/usr/src/app/backend/updatetopology/management/commands/listenonupdates.py", line 77, in handle
asyncio.run(self.process_messages())
File "/usr/local/lib/python3.9/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/local/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
return future.result()
File "/usr/src/app/backend/updatetopology/management/commands/listenonupdates.py", line 69, in process_messages
await self.handle_message(message)
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 223, in __aexit__
await self.closed
File "/usr/src/app/backend/updatetopology/management/commands/listenonupdates.py", line 68, in process_messages
async for message in consumer:
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 74, in __anext__
message, self._yielded_delivery_tag = await _next_delivery(
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 50, in _next_delivery
closed.result() # raise exception if there is one
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/reader.py", line 37, in consume_into_multi_queue_until_connection_close
multi_queue.put_nowait(
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 223, in __aexit__
await self.closed
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/reader.py", line 32, in consume_into_multi_queue_until_connection_close
body, delivery_tag = await consumer.next_delivery()
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 196, in next_delivery
return await _next_delivery(self._queue, self.closed)
File "/usr/local/lib/python3.9/site-packages/carehare/_consume_channel.py", line 50, in _next_delivery
closed.result() # raise exception if there is one
carehare._exceptions.ConnectionClosed
Disconnected from RabbitMQ: RabbitMQ closed the connection: 320 CONNECTION_FORCED - Node was put into maintenance mode. Will reconnect.
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/channels_rabbitmq/core.py", line 263, in _reconnect_forever
await connection.closed
carehare._exceptions.ConnectionClosedByServer: RabbitMQ closed the connection: 320 CONNECTION_FORCED - Node was put into maintenance mode
We like to reconnect automatically after a connection loss, how do you solve this problem?
We have an HA RabbitMQ cluster, where we testing the HA functionality.
Our application structure is like following:
Consumer Implementation
Now, we face the problem, that if one RabbitMQ node in the cluster goes down, the application breaks:
Consumer Error
In the Django Channels consumer, we most possibly have the same problem:
Django Channels Consumer error
We like to reconnect automatically after a connection loss, how do you solve this problem?