[fix] fix clearing caches synchronization and add more logs (#4212)

* [fix] fix clearing caches synchronization and add more logs * [chore] print cache_ready_signal in log
2025-10-06 00:57:33 +08:00 · 2025-09-23 19:36:38 +08:00
parent de4feff147
commit cb8d87b945
3 changed files with 51 additions and 22 deletions
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -1028,12 +1028,12 @@ class GPUModelRunner(ModelRunnerBase):
        create_cache_tensor = profile or self.parallel_config.splitwise_role == "mixed"

        if not create_cache_tensor:
-            logger.info("Waiting for cache managers to create kv cache..")
+            logger.info(f"Waiting for cache managers to create kv cache.. {cache_ready_signal.value}")
            while cache_ready_signal.value[self.local_rank] != 1:
                time.sleep(1)
-            logger.info("OK! Stop waiting.")
+            logger.info(f"OK! Stop waiting. {cache_ready_signal.value}")

-        logger.info("Initializing kv cache for all layers.")
+        logger.info(f"Initializing kv cache for all layers. {cache_ready_signal.value}")
        cache_kvs_list = []
        for i in range(self.model_config.num_hidden_layers):
            key_cache_name = f"key_caches_{i}_rank{local_rank}.device{self.device_id}"
@@ -1054,8 +1054,8 @@ class GPUModelRunner(ModelRunnerBase):
        self.share_inputs["caches"] = cache_kvs_list

        if not profile and create_cache_tensor:
-            logger.info("✅ kv cache is ready!")
            cache_ready_signal.value[self.local_rank] = 1
+            logger.info(f"✅ kv cache is ready! {cache_ready_signal.value}")

        paddle.device.cuda.empty_cache()