[PD Disaggregation] support different tp_size for prefill and decode (#5296)

* up

* up

* up

* fix
This commit is contained in:
Juncai
2025-12-01 17:50:20 +08:00
committed by GitHub
parent 54119cf07e
commit 0925d44f18
13 changed files with 584 additions and 36 deletions

View File

@@ -199,7 +199,7 @@ class SplitwiseConnector:
f"{task.disaggregate_info['cache_info']['rdma']['ip']}:"
+ f"{task.disaggregate_info['cache_info']['rdma']['port']}"
)
self.logger.info(f"send splitwise tasks to port {addr} decode")
self.logger.info(f"send splitwise tasks to port {addr} decode, {task.request_id}")
self.current_request_ids[task.request_id] = "init"
decode_diagg = task.disaggregate_info["cache_info"]
task.disaggregate_info["cache_info"] = self.cfg.disaggregate_info["cache_info"]
@@ -271,6 +271,7 @@ class SplitwiseConnector:
)
def check_decode_allocated(self, task):
self.logger.debug(f"start check decode allocated: {task.request_id}")
start_time = time.time()
if task.disaggregate_info is None:
return True, ""
@@ -280,7 +281,7 @@ class SplitwiseConnector:
return True, ""
while self.current_request_ids[task.request_id] == "init":
time.sleep(0.001)
if time.time() - start_time > 30:
if time.time() - start_time > envs.FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS:
del self.current_request_ids[task.request_id]
return False, "timeout"
msg = self.current_request_ids[task.request_id]
@@ -363,6 +364,7 @@ class SplitwiseConnector:
"rdma_ports": self.cfg.disaggregate_info["cache_info"]["rdma"]["rdma_port"],
"transfer_protocol": "rdma",
"dest_block_ids": dsg_info["block_tables"],
"decode_tp_size": self.cfg.parallel_config.tensor_parallel_size,
}
addr = f"{dsg_info['cache_info']['rdma']['ip']}:" + f"{dsg_info['cache_info']['rdma']['port']}"