diff --git a/fastdeploy/model_executor/models/glm4_moe.py b/fastdeploy/model_executor/models/glm4_moe.py index e9a272e4c..ea56e9a47 100644 --- a/fastdeploy/model_executor/models/glm4_moe.py +++ b/fastdeploy/model_executor/models/glm4_moe.py @@ -160,7 +160,7 @@ class Glm4Moe(nn.Layer): out = out + shared_experts_out # We do to TP all reduce after the sum of experts. if self.tensor_parallel_size > 1: - tensor_model_parallel_all_reduce(out) + tensor_model_parallel_all_reduce(out, self.tp_group) return out