[Features] support hugging face qwen3 dense and qwen2 model (#3574)

* support qwen2 and qwen3 hugging face * fix moe * defualt_v1 loader * hugging_face_format deprecated * modify hugging_face_foramt to model_format * model_format auto * fix environemt * fix bug * fix qwen3-0.6 bug * model_format is str * fix
2025-10-05 16:48:03 +08:00 · 2025-08-26 10:54:53 +08:00
parent 66c5addce4
commit c43a4bec00
10 changed files with 182 additions and 11 deletions
--- a/fastdeploy/model_executor/layers/lm_head.py
+++ b/fastdeploy/model_executor/layers/lm_head.py
@@ -22,7 +22,7 @@ from paddle import nn
 from paddle.distributed import fleet

 from fastdeploy.config import FDConfig
-from fastdeploy.model_executor.utils import set_weight_attrs
+from fastdeploy.model_executor.utils import default_weight_loader, set_weight_attrs

 from .utils import get_tensor

@@ -61,6 +61,7 @@ class ParallelLMHead(nn.Layer):
        self.use_ep: bool = fd_config.parallel_config.use_ep
        self.column_cut = True
        self.nranks = fd_config.parallel_config.tensor_parallel_size
+        self.fd_config = fd_config

        ColumnParallelLinear = fleet.meta_parallel.ColumnParallelLinear
        RowParallelLinear = fleet.meta_parallel.RowParallelLinear
@@ -90,7 +91,14 @@ class ParallelLMHead(nn.Layer):
                    weight_attr=None,
                    has_bias=True if self.bias_key is not None else False,
                    gather_output=need_gather,
-                    fuse_matmul_bias=False,  # False diff更小
+                    fuse_matmul_bias=False,
+                )
+                set_weight_attrs(
+                    self.linear.weight,
+                    {
+                        "weight_loader": default_weight_loader(self.fd_config),
+                        "model_format": self.fd_config.model_config.model_format,
+                    },
                )
                if self.nranks > 1:
                    set_weight_attrs(self.linear.weight, {"output_dim": True})
@@ -102,8 +110,16 @@ class ParallelLMHead(nn.Layer):
                    weight_attr=None,
                    has_bias=True if self.bias_key is not None else False,
                    input_is_parallel=False,
-                    fuse_matmul_bias=False,  # False diff更小
+                    fuse_matmul_bias=False,
                )
+                set_weight_attrs(
+                    self.linear.weight,
+                    {
+                        "weight_loader": default_weight_loader(self.fd_config),
+                        "model_format": self.fd_config.model_config.model_format,
+                    },
+                )
+
                if self.nranks > 1:
                    set_weight_attrs(self.linear.weight, {"output_dim": False})