fix the bug when num_key_value_heads < tensor_parallel_size (#3722)

This commit is contained in:
Zero Rains
2025-08-30 12:40:29 +08:00
committed by GitHub
parent 3364af767b
commit 64cf769bee

View File

@@ -158,7 +158,7 @@ class PrefixCacheManager:
kv_num_head = int(cache_config.model_cfg.num_key_value_heads) // tensor_parallel_size
else:
kv_num_head = cache_config.model_cfg.num_attention_heads // tensor_parallel_size
kv_num_head = max(1, kv_num_head)
cache_ready_signal_data = np.zeros(shape=[tensor_parallel_size], dtype=np.int32)
self.cache_ready_signal = IPCSignal(
name="cache_ready_signal",