support moe offline quant (#5142)

This commit is contained in:
xiaoxiaohehe001
2025-11-24 18:59:18 +08:00
committed by GitHub
parent 5ff93d4998
commit e150a418d4
5 changed files with 12 additions and 3 deletions

View File

@@ -138,7 +138,8 @@ class Ernie4_5_MoE(nn.Layer):
"down_proj_expert_code_zp_key": f"{prefix}.experts.{{}}.down_proj.code_zp",
}
elif moe_quant_type == "tensor_wise_fp8" or (
moe_quant_type == "block_wise_fp8" and fd_config.model_config.is_quantized
moe_quant_type == "block_wise_fp8"
and (fd_config.model_config.is_quantized or fd_config.model_config.is_moe_quantized)
):
weight_key_map = {
"gate_weight_key": f"{prefix}.gate.weight",