mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[PD Disaggregation] Unify the disaggregation info and the pd communication (#5438)
* Unify the disaggregation info and the pd communication * up * up * fix * fix conflict * fix unittest
This commit is contained in:
@@ -424,7 +424,7 @@ class EngineService:
|
||||
|
||||
need_delete_tasks = []
|
||||
for task in tasks:
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed":
|
||||
if self.cfg.scheduler_config.splitwise_role == "prefill":
|
||||
status, msg = self.split_connector.check_decode_allocated(task)
|
||||
if status:
|
||||
task.metrics.ask_decode_resource_finish_time = time.time()
|
||||
@@ -469,7 +469,7 @@ class EngineService:
|
||||
is_prefill = False
|
||||
for i in range(len(tasks)):
|
||||
if tasks[i].disaggregate_info is not None:
|
||||
if tasks[i].disaggregate_info["role"] == "decode":
|
||||
if self.cfg.scheduler_config.splitwise_role == "decode":
|
||||
is_decode = True
|
||||
else:
|
||||
is_prefill = True
|
||||
@@ -811,11 +811,10 @@ class EngineService:
|
||||
f"Engine has fetched tasks from {self.scheduler.__class__.__name__}: {[task.request_id for task in tasks]}"
|
||||
)
|
||||
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed":
|
||||
if self.cfg.scheduler_config.splitwise_role == "prefill":
|
||||
for task in tasks:
|
||||
# start async preprocess
|
||||
self.resource_manager.apply_async_preprocess(task)
|
||||
if self.cfg.scheduler_config.splitwise_role == "prefill":
|
||||
for task in tasks:
|
||||
# start async preprocess
|
||||
self.resource_manager.apply_async_preprocess(task)
|
||||
need_delete_tasks = []
|
||||
if envs.FD_OFFLINE_PERF_TEST_FOR_PD:
|
||||
for task in tasks:
|
||||
@@ -873,7 +872,6 @@ class EngineService:
|
||||
# release resource in P
|
||||
self.resource_manager.pre_recycle_resource(tmp_task.request_id)
|
||||
|
||||
if self.cfg.scheduler_config.splitwise_role == "prefill":
|
||||
# to send cache info to cache messager
|
||||
if tasks:
|
||||
need_check_req_ids = [task.request_id for task in tasks]
|
||||
@@ -912,6 +910,7 @@ class EngineService:
|
||||
tasks.remove(tmp_task)
|
||||
# release resource in P
|
||||
self.resource_manager.pre_recycle_resource(tmp_task.request_id)
|
||||
|
||||
# Fetch requests and add them to the scheduling queue
|
||||
if tasks:
|
||||
for task in tasks:
|
||||
@@ -1765,11 +1764,10 @@ class EngineService:
|
||||
|
||||
role = self.cfg.scheduler_config.splitwise_role
|
||||
host_ip = self.cfg.host_ip
|
||||
disaggregate = self.cfg.disaggregate_info
|
||||
request_queues_for_dp_ipc = None
|
||||
result_queue_for_dp_ipc = None
|
||||
if self.cfg.scheduler_config.name == "splitwise":
|
||||
self.scheduler.start(role, host_ip, disaggregate)
|
||||
self.scheduler.start(role, host_ip, self.cfg.register_info)
|
||||
elif self.cfg.scheduler_config.name == "dp":
|
||||
request_queues_for_dp_ipc = []
|
||||
result_queue_for_dp_ipc = multiprocessing.Queue()
|
||||
|
||||
Reference in New Issue
Block a user