[Features] support hugging face qwen3 dense and qwen2 model (#3574)

* support qwen2 and qwen3 hugging face

* fix moe

* defualt_v1 loader

* hugging_face_format deprecated

* modify hugging_face_foramt to model_format

* model_format auto

* fix environemt

* fix bug

* fix qwen3-0.6 bug

* model_format is str

* fix
This commit is contained in:
lizexu123
2025-08-26 10:54:53 +08:00
committed by GitHub
parent 66c5addce4
commit c43a4bec00
10 changed files with 182 additions and 11 deletions

View File

@@ -334,6 +334,10 @@ class Qwen2ForCausalLM(ModelForCasualLM):
params_dict = dict(self.named_parameters())
process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()))
for loaded_weight_name, loaded_weight in weights_iterator:
model_format = self.fd_config.model_config.model_format
# Because the prefix for Paddle is qwen2, and for Hugging Face it is model.
if model_format == "torch":
loaded_weight_name = loaded_weight_name.replace("model", "qwen2")
for param_name, weight_name, shard_id in stacked_params_mapping:
if weight_name not in loaded_weight_name:
continue