【Sync develop】support vl model name_mapping and ori_vocab_size (#2915)

* support vl ori_vacab_size

* support trainer_degree in name_mapping

* fix
This commit is contained in:
gaoziyuan
2025-07-21 14:14:15 +08:00
committed by GitHub
parent f941124402
commit 4ead15822c
6 changed files with 167 additions and 134 deletions

View File

@@ -37,6 +37,25 @@ class MoEPhase(Enum):
PREFILL = 1
DECODER = 2
class ErnieArchitectures:
"""Helper class for ERNIE architecture check."""
ARCHITECTURES = {
"Ernie4_5_ForCausalLM",
"Ernie4_5_MoeForCausalLM",
"Ernie4_5_VLMoeForConditionalGeneration"
}
@classmethod
def contains_ernie_arch(cls, architectures):
"""Check if any ERNIE architecture is present in the given architectures."""
return any(arch in architectures for arch in cls.ARCHITECTURES)
@classmethod
def is_ernie_arch(cls, architecture):
"""Check if the given architecture is an ERNIE architecture."""
return architecture in cls.ARCHITECTURES
PRETRAINED_INIT_CONFIGURATION = {
"rope_theta" : 10000.0,
"num_key_value_heads" : -1,
@@ -108,9 +127,10 @@ class ModelConfig:
self.vision_config = PretrainedConfig.from_dict(self.vision_config)
self.ori_vocab_size = self.vocab_size
if "Ernie4_5_ForCausalLM" in self.architectures or "Ernie4_5_MoeForCausalLM" in self.architectures:
if ErnieArchitectures.contains_ernie_arch(self.architectures):
self.ori_vocab_size = args["ori_vocab_size"]
class ParallelConfig:
"""Configuration for the distributed execution."""
def __init__(