[Feature] support tensor-parallel-size>num_key_value_heads for qwen3 (#2799)

This commit is contained in:
zhink
2025-07-11 15:09:43 +08:00
committed by GitHub
parent 2c3607407f
commit c08561c13a
4 changed files with 23 additions and 99 deletions

View File

@@ -711,9 +711,9 @@ class GPUModelRunner(ModelRunnerBase):
assert len(self.attn_backends) == 0
num_heads = self.model_config.num_attention_heads // self.parallel_config.tensor_parallel_degree
self.model_config.kv_num_heads = int(
self.model_config.kv_num_heads = max(1, int(
self.model_config.num_key_value_heads
) // self.parallel_config.tensor_parallel_degree
) // self.parallel_config.tensor_parallel_degree)
head_dim = self.model_config.head_dim
# Get the attention backend