[V1 Loader] Support Ernie text(moe and dense) (#3110)

* new loader support 0.3B

* fix weight

* support parallel load

* support parallel load

* fix slice

* support moe

* delete code

* perfect code

* perfect code
This commit is contained in:
YuanRisheng
2025-08-14 20:25:28 +08:00
committed by GitHub
parent ab60292f89
commit 09c979f3dd
6 changed files with 218 additions and 85 deletions

View File

@@ -647,12 +647,12 @@ class DeepseekV3ForCausalLM(ModelForCasualLM):
]
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping = FusedMoE.make_expert_params_mapping(
num_experts=self.fd_config.model_config.n_routed_experts,
ckpt_gate_proj_name="gate_proj",
ckpt_down_proj_name="down_proj",
ckpt_up_proj_name="up_proj",
param_gate_up_proj_name="experts.up_gate_proj_",
param_down_proj_name="experts.down_proj_",
num_experts=self.fd_config.model_config.n_routed_experts,
)
params_dict = dict(self.named_parameters())