mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
* [Optimize] Robust stabilty for PD deployment --------- Co-authored-by: Kaipeng Deng <dengkaipeng@baidu.com>
This commit is contained in:
@@ -125,6 +125,7 @@ class EngineService:
|
||||
split_connector=self.split_connector,
|
||||
)
|
||||
self.token_processor.set_resource_manager(self.resource_manager)
|
||||
# self.token_processor.enable_monitor_hang()
|
||||
|
||||
self.partial_chunked_tokens = [0] * (self.cfg.max_num_partial_prefills + 1)
|
||||
for idx in range(1, self.cfg.max_num_partial_prefills + 1):
|
||||
@@ -716,7 +717,6 @@ class EngineService:
|
||||
is_fetching = False
|
||||
return
|
||||
|
||||
self.llm_logger.debug(f"get tasks from {type(self.scheduler)}: {tasks}")
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed":
|
||||
if self.cfg.scheduler_config.splitwise_role == "prefill":
|
||||
for task in tasks:
|
||||
|
||||
Reference in New Issue
Block a user