From bde97e09f7817696d50e17e57da029f84cba526e Mon Sep 17 00:00:00 2001 From: Sunny-bot1 <68891411+Sunny-bot1@users.noreply.github.com> Date: Wed, 19 Nov 2025 21:11:16 +0800 Subject: [PATCH] support dynamic activation quant for w4afp8 (#5117) --- .../model_executor/layers/moe/fused_moe_cutlass_backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py index 1e706a073..5736f6fda 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py @@ -1090,7 +1090,7 @@ class CutlassW4AFP8MoEMethod(CutlassMoEMethod): "down_proj_in_scale": weight_key_map.get("down_proj_expert_in_scale_key", None), } for name, value in scale_key_map.items(): - if value is None: + if hasattr(layer, name) and value is None: raise ValueError(f"scale {name} should not be none in w4a8 mode.") # 2. Extract scale tensor from state dict @@ -1111,8 +1111,9 @@ class CutlassW4AFP8MoEMethod(CutlassMoEMethod): for expert_idx in logical_expert_ids: for name, scale_key_template in scale_key_map.items(): - scale_tensor = _extract_scale_tensor(layer, state_dict, scale_key_template, expert_idx) - scale_weight_map[name].append(scale_tensor) + if hasattr(layer, name): + scale_tensor = _extract_scale_tensor(layer, state_dict, scale_key_template, expert_idx) + scale_weight_map[name].append(scale_tensor) for i, weight_scale_name in enumerate(["up_gate_proj_weight_scale", "down_proj_weight_scale"]): in_scale_name = weight_scale_name.replace("_weight_scale", "_in_scale")