[Features] support hugging face qwen3 dense and qwen2 model (#3574)

* support qwen2 and qwen3 hugging face

* fix moe

* defualt_v1 loader

* hugging_face_format deprecated

* modify hugging_face_foramt to model_format

* model_format auto

* fix environemt

* fix bug

* fix qwen3-0.6 bug

* model_format is str

* fix
This commit is contained in:
lizexu123
2025-08-26 10:54:53 +08:00
committed by GitHub
parent 66c5addce4
commit c43a4bec00
10 changed files with 182 additions and 11 deletions

View File

@@ -219,7 +219,7 @@ class FusedMoE(nn.Layer):
def _load_gate_up_weight(self, param, expert_id, loaded_weight, shard_id, shard_dim=None):
dim = -1 if shard_dim else 0
if self.tp_size > 1:
if isinstance(loaded_weight, np.ndarray):
if isinstance(loaded_weight, (np.ndarray, paddle.Tensor)):
size = loaded_weight.shape[dim]
else:
size = loaded_weight.get_shape()[dim]
@@ -259,7 +259,7 @@ class FusedMoE(nn.Layer):
def _load_down_weight(self, param, expert_id, loaded_weight, shard_id, shard_dim=None):
if self.tp_size > 1 and shard_dim is not None:
dim = -1 if shard_dim else 0
if isinstance(loaded_weight, np.ndarray):
if isinstance(loaded_weight, (np.ndarray, paddle.Tensor)):
size = loaded_weight.shape[dim]
else:
size = loaded_weight.get_shape()[dim]