Simplify the Config code (#2770)

* simplify the code

* fix vl

* delete config

* fix

* perfect code

* fix ci

* fix xpu

* fix xpu

* fix server

* resolve conflict

* fix mtp

* resolve conflict

* fix xpu

* fix xpu

* fix vl

* fix log

* fix qwen moe

* fix qwen moe

* fix qwen moe
This commit is contained in:
YuanRisheng
2025-07-14 19:50:05 +08:00
committed by GitHub
parent 2e81792d64
commit 4c7b8bc458
34 changed files with 551 additions and 911 deletions

View File

@@ -165,7 +165,7 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
infer_to_train[f"{infer_base_name}.{layer_idx}.mlp.fused_moe.gate_weight"] = \
f"{train_base_name}.{layer_idx}.mlp.gate.weight"
if self.fd_config.moe_config.moe_use_aux_free:
if self.fd_config.model_config.moe_use_aux_free:
infer_to_train[f"{infer_base_name}.{layer_idx}.mlp.fused_moe.gate_correction_bias"] = \
f"{train_base_name}.{layer_idx}.mlp.moe_statics.e_score_correction_bias"
@@ -178,7 +178,7 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
f"{train_base_name}.{layer_idx}.mlp.shared_experts.down_proj.weight"
# MoE experts mappings
for expert_idx in range(self.fd_config.moe_config.num_experts):
for expert_idx in range(self.fd_config.model_config.moe_num_experts):
for ph in place_holders:
# FFN1 (up_gate_proj)
ffn1_key = f"{infer_base_name}.{layer_idx}.mlp.fused_moe.moe_ffn1_weight"
@@ -198,12 +198,12 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM):
# Process non-MoE layers
for layer_idx in range(
self.fd_config.moe_config.moe_layer_start_index):
self.fd_config.model_config.moe_layer_start_index):
_add_layer_mappings(layer_idx, is_moe_layer=False)
# Process MoE layers
for layer_idx in range(self.fd_config.moe_config.moe_layer_start_index,
self.fd_config.model_config.num_layers):
for layer_idx in range(self.fd_config.model_config.moe_layer_start_index,
self.fd_config.model_config.num_hidden_layers):
_add_layer_mappings(layer_idx, is_moe_layer=True)
return infer_to_train
@@ -278,7 +278,7 @@ class Qwen2ForCausalLMRL(Qwen2ForCausalLM):
f"{train_base_name}.{layer_idx}.mlp.down_proj.{ph}"
for layer_idx in range(
self.fd_config.model_config.num_layers):
self.fd_config.model_config.num_hidden_layers):
_add_layer_mappings(layer_idx)
return infer_to_train
@@ -396,7 +396,7 @@ class Qwen3MoeForCausalLMRL(Qwen3MoeForCausalLM):
)
# Process MoE layers
for layer_idx in range(self.fd_config.model_config.num_layers):
for layer_idx in range(self.fd_config.model_config.num_hidden_layers):
_add_layer_mappings(layer_idx, is_moe_layer=True)
return infer_to_train