fix_fp8_deepgemm_moe_tp_bug (#3658)

This commit is contained in:
Liumengyuan
2025-08-28 17:19:02 +08:00
committed by GitHub
parent e93d4cfcdd
commit 2a73a6df03

View File

@@ -49,13 +49,13 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
]
self.up_gate_proj_scale_shape = [
layer.num_local_experts,
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0],
layer.hidden_size // self.quant_config.weight_block_size[1],
ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
]
self.down_proj_scale_shape = [
layer.num_local_experts,
layer.hidden_size // self.quant_config.weight_block_size[0],
layer.moe_intermediate_size // self.quant_config.weight_block_size[1],
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
]
if self.quant_config.is_checkpoint_bf16:
layer.up_gate_proj_weight = layer.create_parameter(