fix block_wise_fp8_v1_loader_moe_shape (#4385)

2025-12-24 13:28:13 +08:00 · 2025-10-15 14:23:38 +08:00
parent e0946ae128
commit adeee84dd6
1 changed files with 2 additions and 10 deletions
--- a/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py
+++ b/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py
@@ -1229,11 +1229,7 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
            layer,
            weight_name,
            layer.create_parameter(
-                shape=[
-                    layer.num_local_experts,
-                    ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
-                    ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
-                ],
+                shape=weight.shape,
                dtype=weight_dtype,
                default_initializer=paddle.nn.initializer.Constant(0),
            ),
@@ -1243,11 +1239,7 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
            layer,
            scale_name,
            layer.create_parameter(
-                shape=[
-                    layer.num_local_experts,
-                    ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
-                    ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
-                ],
+                shape=scale.shape,
                dtype=scale_dtype,
                default_initializer=paddle.nn.initializer.Constant(0),
            ),