adapter qwen3 moe attr for init (#3066)

adapter qwen3 moe attr for init
2025-10-05 08:37:06 +08:00 · 2025-07-30 16:49:28 +08:00
parent 8e203666d9
commit d89b6dd43f
1 changed files with 10 additions and 13 deletions
--- a/fastdeploy/model_executor/models/qwen3moe.py
+++ b/fastdeploy/model_executor/models/qwen3moe.py
@@ -97,29 +97,26 @@ class Qwen3DecoderLayer(nn.Layer):
        prefix: str = "",
    ) -> None:
        super().__init__()
        layer_id = int(prefix.split(sep=".")[-1])
        layer_id = int(prefix.split(sep=".")[-1])
        self.self_attn = Qwen3Attention(
            fd_config=fd_config,
            layer_id=layer_id,
            prefix=f"{prefix}.self_attn",
        )
        weight_key_map = {
            "gate_weight_key": f"{prefix}.mlp.gate.weight",
            "up_gate_proj_expert_weight_key": f"{prefix}.mlp.experts.{{}}.up_gate_proj.weight",
            "down_proj_expert_weight_key": f"{prefix}.mlp.experts.{{}}.down_proj.weight",
        }
-        if (
+        if fd_config.model_config.num_experts is not None and layer_id >= fd_config.model_config.moe_layer_start_index:
            fd_config.model_config.moe_num_experts is not None
            and layer_id >= fd_config.model_config.moe_layer_start_index
        ):
            self.mlp = FusedMoE(
                fd_config,
                moe_intermediate_size=fd_config.model_config.moe_intermediate_size,
-                num_experts=fd_config.model_config.moe_num_experts,
+                num_experts=fd_config.model_config.num_experts,
-                top_k=fd_config.model_config.moe_topk,
+                top_k=fd_config.model_config.num_experts_per_tok,
                layer_idx=layer_id,
                weight_key_map=weight_key_map,
            )
@@ -386,12 +383,12 @@ class Qwen3MoePretrainedModel(PretrainedModel):
            return final_actions
        num_experts = 0
-        if isinstance(config.moe_num_experts, list):
+        if isinstance(config.num_experts, list):
-            num_experts = sum(config.moe_num_experts)
+            num_experts = sum(config.num_experts)
-        elif isinstance(config.moe_num_experts, int):
+        elif isinstance(config.num_experts, int):
-            num_experts = config.moe_num_experts
+            num_experts = config.num_experts
        else:
-            raise ValueError(f"Not support type of num_experts [{type(config.moe_num_experts)}]")
+            raise ValueError(f"Not support type of num_experts [{type(config.num_experts)}]")
        mappings = get_tensor_parallel_split_mappings(config.num_hidden_layers, num_experts)