mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[fix] fix prefix caching not enabled
This commit is contained in:
@@ -230,7 +230,7 @@ class PrefixCacheManager:
|
||||
while np.sum(self.cache_ready_signal.value) != tensor_parallel_size:
|
||||
time.sleep(1)
|
||||
|
||||
if cache_config.swap_space is not None and cache_config.swap_space > 0:
|
||||
if cache_config.enable_hierarchical_cache and self.num_cpu_blocks > 0:
|
||||
while np.sum(self.swap_space_ready_signal.value) != tensor_parallel_size:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@@ -385,6 +385,7 @@ class EngineArgs:
|
||||
"""
|
||||
Post-initialization processing to set default tokenizer if not provided.
|
||||
"""
|
||||
from fastdeploy.utils import llm_logger
|
||||
if not self.tokenizer:
|
||||
self.tokenizer = self.model
|
||||
if self.enable_logprob:
|
||||
|
||||
@@ -163,15 +163,7 @@ class LLMEngine:
|
||||
self._stop_profile()
|
||||
elif self.cfg.cache_config.enable_prefix_caching:
|
||||
device_ids = self.cfg.device_ids.split(",")
|
||||
self.cache_manager_processes = self.resource_manager.cache_manager.launch_cache_manager(
|
||||
cache_config=self.cfg.cache_config,
|
||||
tensor_parallel_size=self.cfg.tensor_parallel_size,
|
||||
device_ids=device_ids,
|
||||
pod_ip=self.cfg.master_ip,
|
||||
engine_worker_queue_port=self.cfg.engine_worker_queue_port,
|
||||
pid_suffix=self.ipc_signal_suffix,
|
||||
create_cache_tensor=False,
|
||||
)
|
||||
self.cache_manager_processes = self.engine.start_cache_service(device_ids, self.ipc_signal_suffix, False)
|
||||
|
||||
# Launch components: scheduler, cache_manager, expert_service et.al.
|
||||
self.launch_components()
|
||||
|
||||
Reference in New Issue
Block a user