From 94a55fc1586c3f01df7d0d22c0d0fc5523fdcf69 Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Fri, 12 Sep 2025 19:30:37 +0800 Subject: [PATCH] [fix] fix prefix caching not enabled --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 +- fastdeploy/engine/args_utils.py | 1 + fastdeploy/engine/engine.py | 10 +--------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index 03dffe906..14c7bcb00 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -230,7 +230,7 @@ class PrefixCacheManager: while np.sum(self.cache_ready_signal.value) != tensor_parallel_size: time.sleep(1) - if cache_config.swap_space is not None and cache_config.swap_space > 0: + if cache_config.enable_hierarchical_cache and self.num_cpu_blocks > 0: while np.sum(self.swap_space_ready_signal.value) != tensor_parallel_size: time.sleep(1) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index 4f0ee57cb..2a1c9dc5a 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -385,6 +385,7 @@ class EngineArgs: """ Post-initialization processing to set default tokenizer if not provided. """ + from fastdeploy.utils import llm_logger if not self.tokenizer: self.tokenizer = self.model if self.enable_logprob: diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 3905b3fe6..f8f17eda4 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -163,15 +163,7 @@ class LLMEngine: self._stop_profile() elif self.cfg.cache_config.enable_prefix_caching: device_ids = self.cfg.device_ids.split(",") - self.cache_manager_processes = self.resource_manager.cache_manager.launch_cache_manager( - cache_config=self.cfg.cache_config, - tensor_parallel_size=self.cfg.tensor_parallel_size, - device_ids=device_ids, - pod_ip=self.cfg.master_ip, - engine_worker_queue_port=self.cfg.engine_worker_queue_port, - pid_suffix=self.ipc_signal_suffix, - create_cache_tensor=False, - ) + self.cache_manager_processes = self.engine.start_cache_service(device_ids, self.ipc_signal_suffix, False) # Launch components: scheduler, cache_manager, expert_service et.al. self.launch_components()