[Optimize][Cherry-pick] Robust stabilty for PD deployment #5338 (#5395)

* [Optimize] Robust stabilty for PD deployment

---------

Co-authored-by: Kaipeng Deng <dengkaipeng@baidu.com>
This commit is contained in:
chenjian
2025-12-15 18:58:09 +08:00
committed by GitHub
parent f133ce501c
commit 4c76171b57
12 changed files with 161 additions and 41 deletions

View File

@@ -98,6 +98,12 @@ class InternalAdapter:
self.recv_control_cmd_server.response_for_control_cmd(task_id_str, result)
elif task["cmd"] == "connect_rdma":
self.engine.engine_worker_queue.put_connect_rdma_task(task)
elif task["cmd"] == "check_health":
is_health = self.engine.token_processor.healthy()
result = {"task_id": task_id_str, "result": is_health}
logger.debug(f"Response for task: {task_id_str}: is_health {is_health}")
with self.response_lock:
self.recv_control_cmd_server.response_for_control_cmd(task_id_str, result)
except Exception as e:
logger.error(f"handle_control_cmd got error: {e}, {traceback.format_exc()!s}")

View File

@@ -386,6 +386,13 @@ class SplitwiseConnector:
if msg_type == "decode" or msg_type == "prefill":
payload = [output.to_dict() for output in payload]
need_delete_keys = ["video_features", "image_features", "audio_features"]
for tmp_data in payload:
if "multimodal_inputs" not in tmp_data:
continue
for tmp_key in need_delete_keys:
if tmp_key in tmp_data["multimodal_inputs"]:
del tmp_data["multimodal_inputs"][tmp_key]
json_data = json.dumps({"type": msg_type, "payload": payload}).encode("utf-8")
return json_data