From 64cf769bee0d43fbd06cc41b87cd3d4a8fdf8f6f Mon Sep 17 00:00:00 2001 From: Zero Rains Date: Sat, 30 Aug 2025 12:40:29 +0800 Subject: [PATCH] fix the bug when num_key_value_heads < tensor_parallel_size (#3722) --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index 1eccb0047..0293e398b 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -158,7 +158,7 @@ class PrefixCacheManager: kv_num_head = int(cache_config.model_cfg.num_key_value_heads) // tensor_parallel_size else: kv_num_head = cache_config.model_cfg.num_attention_heads // tensor_parallel_size - + kv_num_head = max(1, kv_num_head) cache_ready_signal_data = np.zeros(shape=[tensor_parallel_size], dtype=np.int32) self.cache_ready_signal = IPCSignal( name="cache_ready_signal",