diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py index f04b35b7b..e95829819 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py @@ -49,13 +49,13 @@ class DeepGemmFusedMoeMethod(MoEMethodBase): ] self.up_gate_proj_scale_shape = [ layer.num_local_experts, - layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0], - layer.hidden_size // self.quant_config.weight_block_size[1], + ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]), + ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]), ] self.down_proj_scale_shape = [ layer.num_local_experts, - layer.hidden_size // self.quant_config.weight_block_size[0], - layer.moe_intermediate_size // self.quant_config.weight_block_size[1], + ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]), + ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]), ] if self.quant_config.is_checkpoint_bf16: layer.up_gate_proj_weight = layer.create_parameter(