From f206474cc7c70100e5cabb0154fc5ae7ac1539ec Mon Sep 17 00:00:00 2001 From: Zero Rains Date: Sat, 30 Aug 2025 12:40:00 +0800 Subject: [PATCH] fix the bug when num_key_value_heads < tensor_parallel_size (#3717) --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index e57f0f43b..0fc04a95a 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -154,7 +154,7 @@ class PrefixCacheManager: kv_num_head = int(cache_config.model_cfg.num_key_value_heads) // tensor_parallel_size else: kv_num_head = cache_config.model_cfg.num_attention_heads // tensor_parallel_size - + kv_num_head = max(1, kv_num_head) cache_ready_signal_data = np.zeros(shape=[tensor_parallel_size], dtype=np.int32) self.cache_ready_signal = IPCSignal( name="cache_ready_signal",