From b433a93d9a964c6b0ad53dfbfc8da86883b77e4f Mon Sep 17 00:00:00 2001 From: Zero Rains Date: Wed, 24 Sep 2025 19:46:52 +0800 Subject: [PATCH] fix the bug for prefilled_step_idx signal of cache_messager in cudagraph and PD (#4235) --- fastdeploy/cache_manager/cache_messager.py | 1 - fastdeploy/worker/worker_process.py | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fastdeploy/cache_manager/cache_messager.py b/fastdeploy/cache_manager/cache_messager.py index 4502860d0..3d01c0a8b 100644 --- a/fastdeploy/cache_manager/cache_messager.py +++ b/fastdeploy/cache_manager/cache_messager.py @@ -267,7 +267,6 @@ class CacheMessager: self.cache_info[info["request_id"]] = info prefilled_layer_idx = layer_shm_value.value[0] prefilled_step_idx = step_shm_value.value[0] - logger.info(f"prefilled_layer_idx: {prefilled_layer_idx}, prefilled_step_idx: {prefilled_step_idx}") if prefilled_layer_idx == self.num_layers - 1: time.sleep(0.001) prefilled_layer_idx = layer_shm_value.value[0] diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index a830ef7ed..051a55f58 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -442,6 +442,23 @@ class PaddleDisWorkerProc: def graph_optimize_and_warm_up_model(self) -> None: self.worker.graph_optimize_and_warm_up_model() + # reset cache_messager prefilled_step signal + if self.scheduler_config.splitwise_role == "prefill": + dp_rank_id = ( + self.local_rank + + self.parallel_config.local_data_parallel_id * self.parallel_config.tensor_parallel_size + ) + gpu_id = self.worker.model_runner.device_id + prefilled_step_name = f"splitwise_complete_prefilled_step_{dp_rank_id}" + prefilled_step_idx_data = np.zeros(shape=[1], dtype=np.int32) + step_shm_value = IPCSignal( + name=prefilled_step_name, + array=prefilled_step_idx_data, + dtype=np.int32, + suffix=gpu_id, + create=False, + ) + step_shm_value.value[0] = -1 def init_device(self) -> None: """Initialize device and Construct model runner""" @@ -842,7 +859,7 @@ def run_worker_proc() -> None: worker_proc.initialize_kv_cache() # Trigger CUDAGraph capture - worker_proc.worker.graph_optimize_and_warm_up_model() + worker_proc.graph_optimize_and_warm_up_model() # Initialize health status worker_proc.init_health_status()