From 4dbaa3d74c2dbe7e09bcae29a620a8c8508533ed Mon Sep 17 00:00:00 2001 From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com> Date: Mon, 11 Aug 2025 21:02:42 +0800 Subject: [PATCH] Fix w4a8 scale load (#3334) * fix_eplb * fix eplb part3 * support_fp8_rope3d * fix w4a8 scale --- .../model_executor/layers/moe/fused_moe_cutlass_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py index 1ba684f65..1e85421fe 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py @@ -361,7 +361,7 @@ class CutlassW4A8MoEMethod(CutlassMoEMethod): if layer.ep_size > 1: for expert_idx in ep_rank_to_expert_id_list: scale_tensor = get_tensor( - get_tensor( + ( state_dict[up_gate_proj_expert_in_scale_key.format(expert_idx)] if up_gate_proj_expert_in_scale_key.format(expert_idx) in state_dict else up_gate_proj_expert_in_scale_key.format(expert_idx)