[MTP]Support mtp reshard (#4099)

* support rl reshard

* modify model name
This commit is contained in:
freeliuzc
2025-09-15 17:13:53 +08:00
committed by GitHub
parent 46911f903d
commit 69aa2781a1
3 changed files with 4 additions and 0 deletions

View File

@@ -134,6 +134,7 @@ class ModelConfig:
self.lm_head_fp32: bool = False
self.model_format = "auto"
self.partial_rotary_factor: float = 1.0
self.num_nextn_predict_layers = 0
for key, value in args.items():
if hasattr(self, key) and value != "None":
setattr(self, key, value)

View File

@@ -24,6 +24,7 @@ class MultimodalRegistry:
"Ernie4_5_VLMoeForConditionalGeneration",
"Ernie5MoeForCausalLM",
"Qwen2_5_VLForConditionalGeneration",
"Ernie5ForCausalLM",
}
@classmethod

View File

@@ -63,6 +63,7 @@ class RolloutModelConfig:
local_rank: int = 0,
moba_attention_config: str = None,
data_parallel_size: int = 1,
num_nextn_predict_layers: int = 0,
):
# Required parameters
self.model = model_name_or_path
@@ -109,6 +110,7 @@ class RolloutModelConfig:
self.early_stop_config = early_stop_config
self.ips = None
self.moba_attention_config = moba_attention_config
self.num_nextn_predict_layers = num_nextn_predict_layers
def __str__(self):
return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())