Simplify the Config code (#2770)

* simplify the code

* fix vl

* delete config

* fix

* perfect code

* fix ci

* fix xpu

* fix xpu

* fix server

* resolve conflict

* fix mtp

* resolve conflict

* fix xpu

* fix xpu

* fix vl

* fix log

* fix qwen moe

* fix qwen moe

* fix qwen moe
This commit is contained in:
YuanRisheng
2025-07-14 19:50:05 +08:00
committed by GitHub
parent 2e81792d64
commit 4c7b8bc458
34 changed files with 551 additions and 911 deletions

View File

@@ -43,7 +43,7 @@ def load_ep_checkpoint(model_path: str,
filtered_map = {k: v for k, v in weight_list.items() if "experts" not in k}
num_local_ffn_keys = []
for i in range(config.moe_layer_start_index, config.num_layers):
for i in range(config.moe_layer_start_index, config.num_hidden_layers):
for j in range(
config.num_experts_start_offset,
config.num_experts_start_offset + config.num_experts_per_rank,
@@ -261,7 +261,7 @@ def load_composite_checkpoint(
and os.path.isdir(os.path.join(model_path, f))
]
if len(rank_dirs) > 1:
if fd_config.parallel_config.tensor_parallel_degree != len(
if fd_config.parallel_config.tensor_parallel_size != len(
rank_dirs):
raise ValueError(
f"Your model only supports loading with tp{len(rank_dirs)}"
@@ -283,7 +283,7 @@ def load_composite_checkpoint(
else:
state_dict = load_tp_checkpoint(model_path,
cls,
fd_config.model_config,
fd_config.model_config.pretrained_config,
return_numpy=return_numpy)
if not state_dict:
raise ValueError("weight not found in state_dict !")