support moe offline quant (#5142)

2025-12-24 13:28:13 +08:00 · 2025-11-24 18:59:18 +08:00
parent 5ff93d4998
commit e150a418d4
5 changed files with 12 additions and 3 deletions
--- a/fastdeploy/model_executor/models/ernie4_5_moe.py
+++ b/fastdeploy/model_executor/models/ernie4_5_moe.py
@@ -138,7 +138,8 @@ class Ernie4_5_MoE(nn.Layer):
                "down_proj_expert_code_zp_key": f"{prefix}.experts.{{}}.down_proj.code_zp",
            }
        elif moe_quant_type == "tensor_wise_fp8" or (
-            moe_quant_type == "block_wise_fp8" and fd_config.model_config.is_quantized
+            moe_quant_type == "block_wise_fp8"
+            and (fd_config.model_config.is_quantized or fd_config.model_config.is_moe_quantized)
        ):
            weight_key_map = {
                "gate_weight_key": f"{prefix}.gate.weight",