diff --git a/fastdeploy/cache_manager/cache_messager.py b/fastdeploy/cache_manager/cache_messager.py index e06d05a67..456ba1c34 100644 --- a/fastdeploy/cache_manager/cache_messager.py +++ b/fastdeploy/cache_manager/cache_messager.py @@ -142,7 +142,7 @@ class CacheMessager: self.gpu_id = gpu_id self.cache_info = dict() - self.dp_rank_id = local_data_parallel_id + self.dp_rank_id = self.rank + local_data_parallel_id * self.nranks layerwise_send_cache_thread = threading.Thread(target=self._prefill_layerwise_send_cache_thread) layerwise_send_cache_thread.daemon = True