[fix] fix prefix caching not enabled

This commit is contained in:
liyonghua0910
2025-09-12 19:30:37 +08:00
parent c7b8f4f8c6
commit 94a55fc158
3 changed files with 3 additions and 10 deletions

View File

@@ -230,7 +230,7 @@ class PrefixCacheManager:
while np.sum(self.cache_ready_signal.value) != tensor_parallel_size:
time.sleep(1)
if cache_config.swap_space is not None and cache_config.swap_space > 0:
if cache_config.enable_hierarchical_cache and self.num_cpu_blocks > 0:
while np.sum(self.swap_space_ready_signal.value) != tensor_parallel_size:
time.sleep(1)

View File

@@ -385,6 +385,7 @@ class EngineArgs:
"""
Post-initialization processing to set default tokenizer if not provided.
"""
from fastdeploy.utils import llm_logger
if not self.tokenizer:
self.tokenizer = self.model
if self.enable_logprob:

View File

@@ -163,15 +163,7 @@ class LLMEngine:
self._stop_profile()
elif self.cfg.cache_config.enable_prefix_caching:
device_ids = self.cfg.device_ids.split(",")
self.cache_manager_processes = self.resource_manager.cache_manager.launch_cache_manager(
cache_config=self.cfg.cache_config,
tensor_parallel_size=self.cfg.tensor_parallel_size,
device_ids=device_ids,
pod_ip=self.cfg.master_ip,
engine_worker_queue_port=self.cfg.engine_worker_queue_port,
pid_suffix=self.ipc_signal_suffix,
create_cache_tensor=False,
)
self.cache_manager_processes = self.engine.start_cache_service(device_ids, self.ipc_signal_suffix, False)
# Launch components: scheduler, cache_manager, expert_service et.al.
self.launch_components()