[Cherry-Pick][BugFix]fix the bug for prefilled_step_idx signal of cache_messager in cudagraph and PD (#4252)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* fix the bug for prefilled_step_idx signal of cache_messager in cudagraph and PD

* support dp
This commit is contained in:
Zero Rains
2025-10-13 10:18:53 +08:00
committed by GitHub
parent 8d629568f2
commit 07db281647
2 changed files with 14 additions and 2 deletions

View File

@@ -211,7 +211,6 @@ class CacheMessager:
self.cache_info[info["request_id"]] = info
prefilled_layer_idx = layer_shm_value.value[0]
prefilled_step_idx = step_shm_value.value[0]
logger.info(f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx: {prefilled_step_idx}")
if prefilled_layer_idx == self.num_layers - 1:
time.sleep(0.001)
prefilled_layer_idx = layer_shm_value.value[0]

View File

@@ -428,6 +428,19 @@ class PaddleDisWorkerProc:
def graph_optimize_and_warm_up_model(self) -> None:
self.worker.graph_optimize_and_warm_up_model()
# reset cache_messager prefilled_step signal
if self.parallel_config.splitwise_role == "prefill":
gpu_id = self.worker.model_runner.device_id
prefilled_step_name = f"splitwise_complete_prefilled_step_{self.local_rank}"
prefilled_step_idx_data = np.zeros(shape=[1], dtype=np.int32)
step_shm_value = IPCSignal(
name=prefilled_step_name,
array=prefilled_step_idx_data,
dtype=np.int32,
suffix=gpu_id,
create=False,
)
step_shm_value.value[0] = -1
def init_device(self) -> None:
"""Initialize device and Construct model runner"""
@@ -821,7 +834,7 @@ def run_worker_proc() -> None:
worker_proc.initialize_kv_cache()
# Trigger CUDAGraph capture
worker_proc.worker.graph_optimize_and_warm_up_model()
worker_proc.graph_optimize_and_warm_up_model()
# Initialize health status
worker_proc.init_health_status()