fix glm all_reduce tp group (#4187)

2025-10-05 16:48:03 +08:00 · 2025-09-22 10:56:55 +08:00
parent 718f32a6b0
commit da74a5f0b3
1 changed files with 1 additions and 1 deletions
--- a/fastdeploy/model_executor/models/glm4_moe.py
+++ b/fastdeploy/model_executor/models/glm4_moe.py
@@ -157,7 +157,7 @@ class Glm4Moe(nn.Layer):
        out = out + shared_experts_out
        # We do to TP all reduce after the sum of experts.
        if self.tensor_parallel_size > 1:
-            tensor_model_parallel_all_reduce(out)
+            tensor_model_parallel_all_reduce(out, self.tp_group)
        return out