[PD Disaggregation] Unify the disaggregation info and the pd communication (#5438)

* Unify the disaggregation info and the pd communication

* up

* up

* fix

* fix conflict

* fix unittest
This commit is contained in:
Juncai
2025-12-09 14:44:59 +08:00
committed by GitHub
parent 8178e3fc6a
commit 83ea9646f9
10 changed files with 146 additions and 233 deletions

View File

@@ -424,7 +424,7 @@ class EngineService:
need_delete_tasks = []
for task in tasks:
if self.cfg.scheduler_config.splitwise_role != "mixed":
if self.cfg.scheduler_config.splitwise_role == "prefill":
status, msg = self.split_connector.check_decode_allocated(task)
if status:
task.metrics.ask_decode_resource_finish_time = time.time()
@@ -469,7 +469,7 @@ class EngineService:
is_prefill = False
for i in range(len(tasks)):
if tasks[i].disaggregate_info is not None:
if tasks[i].disaggregate_info["role"] == "decode":
if self.cfg.scheduler_config.splitwise_role == "decode":
is_decode = True
else:
is_prefill = True
@@ -811,11 +811,10 @@ class EngineService:
f"Engine has fetched tasks from {self.scheduler.__class__.__name__}: {[task.request_id for task in tasks]}"
)
if self.cfg.scheduler_config.splitwise_role != "mixed":
if self.cfg.scheduler_config.splitwise_role == "prefill":
for task in tasks:
# start async preprocess
self.resource_manager.apply_async_preprocess(task)
if self.cfg.scheduler_config.splitwise_role == "prefill":
for task in tasks:
# start async preprocess
self.resource_manager.apply_async_preprocess(task)
need_delete_tasks = []
if envs.FD_OFFLINE_PERF_TEST_FOR_PD:
for task in tasks:
@@ -873,7 +872,6 @@ class EngineService:
# release resource in P
self.resource_manager.pre_recycle_resource(tmp_task.request_id)
if self.cfg.scheduler_config.splitwise_role == "prefill":
# to send cache info to cache messager
if tasks:
need_check_req_ids = [task.request_id for task in tasks]
@@ -912,6 +910,7 @@ class EngineService:
tasks.remove(tmp_task)
# release resource in P
self.resource_manager.pre_recycle_resource(tmp_task.request_id)
# Fetch requests and add them to the scheduling queue
if tasks:
for task in tasks:
@@ -1765,11 +1764,10 @@ class EngineService:
role = self.cfg.scheduler_config.splitwise_role
host_ip = self.cfg.host_ip
disaggregate = self.cfg.disaggregate_info
request_queues_for_dp_ipc = None
result_queue_for_dp_ipc = None
if self.cfg.scheduler_config.name == "splitwise":
self.scheduler.start(role, host_ip, disaggregate)
self.scheduler.start(role, host_ip, self.cfg.register_info)
elif self.cfg.scheduler_config.name == "dp":
request_queues_for_dp_ipc = []
result_queue_for_dp_ipc = multiprocessing.Queue()