mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Features] support hugging face qwen3 dense and qwen2 model (#3574)
* support qwen2 and qwen3 hugging face * fix moe * defualt_v1 loader * hugging_face_format deprecated * modify hugging_face_foramt to model_format * model_format auto * fix environemt * fix bug * fix qwen3-0.6 bug * model_format is str * fix
This commit is contained in:
@@ -22,7 +22,7 @@ from paddle import nn
|
||||
from paddle.distributed import fleet
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.model_executor.utils import set_weight_attrs
|
||||
from fastdeploy.model_executor.utils import default_weight_loader, set_weight_attrs
|
||||
|
||||
from .utils import get_tensor
|
||||
|
||||
@@ -61,6 +61,7 @@ class ParallelLMHead(nn.Layer):
|
||||
self.use_ep: bool = fd_config.parallel_config.use_ep
|
||||
self.column_cut = True
|
||||
self.nranks = fd_config.parallel_config.tensor_parallel_size
|
||||
self.fd_config = fd_config
|
||||
|
||||
ColumnParallelLinear = fleet.meta_parallel.ColumnParallelLinear
|
||||
RowParallelLinear = fleet.meta_parallel.RowParallelLinear
|
||||
@@ -90,7 +91,14 @@ class ParallelLMHead(nn.Layer):
|
||||
weight_attr=None,
|
||||
has_bias=True if self.bias_key is not None else False,
|
||||
gather_output=need_gather,
|
||||
fuse_matmul_bias=False, # False diff更小
|
||||
fuse_matmul_bias=False,
|
||||
)
|
||||
set_weight_attrs(
|
||||
self.linear.weight,
|
||||
{
|
||||
"weight_loader": default_weight_loader(self.fd_config),
|
||||
"model_format": self.fd_config.model_config.model_format,
|
||||
},
|
||||
)
|
||||
if self.nranks > 1:
|
||||
set_weight_attrs(self.linear.weight, {"output_dim": True})
|
||||
@@ -102,8 +110,16 @@ class ParallelLMHead(nn.Layer):
|
||||
weight_attr=None,
|
||||
has_bias=True if self.bias_key is not None else False,
|
||||
input_is_parallel=False,
|
||||
fuse_matmul_bias=False, # False diff更小
|
||||
fuse_matmul_bias=False,
|
||||
)
|
||||
set_weight_attrs(
|
||||
self.linear.weight,
|
||||
{
|
||||
"weight_loader": default_weight_loader(self.fd_config),
|
||||
"model_format": self.fd_config.model_config.model_format,
|
||||
},
|
||||
)
|
||||
|
||||
if self.nranks > 1:
|
||||
set_weight_attrs(self.linear.weight, {"output_dim": False})
|
||||
|
||||
|
Reference in New Issue
Block a user