[BugFix] fix control signal release failed (#3390)

* [BugFix] fix control signal release failed

* [BugFix] fix control signal release failed

* update

* update

* update
This commit is contained in:
ltd0924
2025-08-19 13:51:38 +08:00
committed by GitHub
parent 8b12c80f90
commit bca8905b40
4 changed files with 58 additions and 48 deletions

View File

@@ -31,7 +31,7 @@ class ZmqClient:
"""
def __init__(self, name, mode):
self.context = zmq.Context()
self.context = zmq.Context(4)
self.socket = self.context.socket(mode)
self.file_name = f"/dev/shm/{name}.socket"
self.router_path = f"/dev/shm/router_{name}.ipc"
@@ -67,6 +67,7 @@ class ZmqClient:
"""
self.router = self.context.socket(zmq.ROUTER)
self.router.setsockopt(zmq.SNDHWM, self.ZMQ_SNDHWM)
self.router.setsockopt(zmq.ROUTER_MANDATORY, 1)
self.router.setsockopt(zmq.SNDTIMEO, -1)
self.router.bind(f"ipc://{self.router_path}")
@@ -125,6 +126,11 @@ class ZmqClient:
else:
break
if self.req_dict[req_id] == -1:
if data[-1].finished:
with self.mutex:
self.req_dict.pop(req_id, None)
return
try:
start_send = time.time()
if self.aggregate_send:
@@ -133,7 +139,9 @@ class ZmqClient:
result = msgpack.packb([response.to_dict() for response in data])
self.router.send_multipart([self.req_dict[req_id], b"", result])
llm_logger.debug(f"send_multipart result: {req_id} len {len(data)} elapse: {time.time()-start_send}")
except zmq.ZMQError as e:
llm_logger.error(f"[{req_id}] zmq error: {e}")
self.req_dict[req_id] = -1
except Exception as e:
llm_logger.error(f"Send result to zmq client failed: {e}")