mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
Simplify the Config code (#2770)
* simplify the code * fix vl * delete config * fix * perfect code * fix ci * fix xpu * fix xpu * fix server * resolve conflict * fix mtp * resolve conflict * fix xpu * fix xpu * fix vl * fix log * fix qwen moe * fix qwen moe * fix qwen moe
This commit is contained in:
@@ -165,7 +165,7 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
|
||||
infer_to_train[f"{infer_base_name}.{layer_idx}.mlp.fused_moe.gate_weight"] = \
|
||||
f"{train_base_name}.{layer_idx}.mlp.gate.weight"
|
||||
|
||||
if self.fd_config.moe_config.moe_use_aux_free:
|
||||
if self.fd_config.model_config.moe_use_aux_free:
|
||||
infer_to_train[f"{infer_base_name}.{layer_idx}.mlp.fused_moe.gate_correction_bias"] = \
|
||||
f"{train_base_name}.{layer_idx}.mlp.moe_statics.e_score_correction_bias"
|
||||
|
||||
@@ -178,7 +178,7 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
|
||||
f"{train_base_name}.{layer_idx}.mlp.shared_experts.down_proj.weight"
|
||||
|
||||
# MoE experts mappings
|
||||
for expert_idx in range(self.fd_config.moe_config.num_experts):
|
||||
for expert_idx in range(self.fd_config.model_config.moe_num_experts):
|
||||
for ph in place_holders:
|
||||
# FFN1 (up_gate_proj)
|
||||
ffn1_key = f"{infer_base_name}.{layer_idx}.mlp.fused_moe.moe_ffn1_weight"
|
||||
@@ -198,12 +198,12 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
|
||||
|
||||
# Process non-MoE layers
|
||||
for layer_idx in range(
|
||||
self.fd_config.moe_config.moe_layer_start_index):
|
||||
self.fd_config.model_config.moe_layer_start_index):
|
||||
_add_layer_mappings(layer_idx, is_moe_layer=False)
|
||||
|
||||
# Process MoE layers
|
||||
for layer_idx in range(self.fd_config.moe_config.moe_layer_start_index,
|
||||
self.fd_config.model_config.num_layers):
|
||||
for layer_idx in range(self.fd_config.model_config.moe_layer_start_index,
|
||||
self.fd_config.model_config.num_hidden_layers):
|
||||
_add_layer_mappings(layer_idx, is_moe_layer=True)
|
||||
|
||||
return infer_to_train
|
||||
@@ -278,7 +278,7 @@ class Qwen2ForCausalLMRL(Qwen2ForCausalLM):
|
||||
f"{train_base_name}.{layer_idx}.mlp.down_proj.{ph}"
|
||||
|
||||
for layer_idx in range(
|
||||
self.fd_config.model_config.num_layers):
|
||||
self.fd_config.model_config.num_hidden_layers):
|
||||
_add_layer_mappings(layer_idx)
|
||||
|
||||
return infer_to_train
|
||||
@@ -396,7 +396,7 @@ class Qwen3MoeForCausalLMRL(Qwen3MoeForCausalLM):
|
||||
)
|
||||
|
||||
# Process MoE layers
|
||||
for layer_idx in range(self.fd_config.model_config.num_layers):
|
||||
for layer_idx in range(self.fd_config.model_config.num_hidden_layers):
|
||||
_add_layer_mappings(layer_idx, is_moe_layer=True)
|
||||
|
||||
return infer_to_train
|
||||
|
Reference in New Issue
Block a user