[fix] fix clearing caches synchronization and add more logs (#4212)

* [fix] fix clearing caches synchronization and add more logs

* [chore] print cache_ready_signal in log
This commit is contained in:
李泳桦
2025-09-23 19:36:38 +08:00
committed by GitHub
parent de4feff147
commit cb8d87b945
3 changed files with 51 additions and 22 deletions

View File

@@ -1028,12 +1028,12 @@ class GPUModelRunner(ModelRunnerBase):
create_cache_tensor = profile or self.parallel_config.splitwise_role == "mixed"
if not create_cache_tensor:
logger.info("Waiting for cache managers to create kv cache..")
logger.info(f"Waiting for cache managers to create kv cache.. {cache_ready_signal.value}")
while cache_ready_signal.value[self.local_rank] != 1:
time.sleep(1)
logger.info("OK! Stop waiting.")
logger.info(f"OK! Stop waiting. {cache_ready_signal.value}")
logger.info("Initializing kv cache for all layers.")
logger.info(f"Initializing kv cache for all layers. {cache_ready_signal.value}")
cache_kvs_list = []
for i in range(self.model_config.num_hidden_layers):
key_cache_name = f"key_caches_{i}_rank{local_rank}.device{self.device_id}"
@@ -1054,8 +1054,8 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["caches"] = cache_kvs_list
if not profile and create_cache_tensor:
logger.info("✅ kv cache is ready!")
cache_ready_signal.value[self.local_rank] = 1
logger.info(f"✅ kv cache is ready! {cache_ready_signal.value}")
paddle.device.cuda.empty_cache()