[Optimize][Cherry-pick] Robust stabilty for PD deployment #5338 (#5395)

* [Optimize] Robust stabilty for PD deployment

---------

Co-authored-by: Kaipeng Deng <dengkaipeng@baidu.com>
This commit is contained in:
chenjian
2025-12-15 18:58:09 +08:00
committed by GitHub
parent f133ce501c
commit 4c76171b57
12 changed files with 161 additions and 41 deletions

View File

@@ -125,6 +125,7 @@ class EngineService:
split_connector=self.split_connector,
)
self.token_processor.set_resource_manager(self.resource_manager)
# self.token_processor.enable_monitor_hang()
self.partial_chunked_tokens = [0] * (self.cfg.max_num_partial_prefills + 1)
for idx in range(1, self.cfg.max_num_partial_prefills + 1):
@@ -716,7 +717,6 @@ class EngineService:
is_fetching = False
return
self.llm_logger.debug(f"get tasks from {type(self.scheduler)}: {tasks}")
if self.cfg.scheduler_config.splitwise_role != "mixed":
if self.cfg.scheduler_config.splitwise_role == "prefill":
for task in tasks: