[Features] support hugging face qwen3 dense and qwen2 model (#3574)

* support qwen2 and qwen3 hugging face

* fix moe

* defualt_v1 loader

* hugging_face_format deprecated

* modify hugging_face_foramt to model_format

* model_format auto

* fix environemt

* fix bug

* fix qwen3-0.6 bug

* model_format is str

* fix
This commit is contained in:
lizexu123
2025-08-26 10:54:53 +08:00
committed by GitHub
parent 66c5addce4
commit c43a4bec00
10 changed files with 182 additions and 11 deletions

View File

@@ -16,6 +16,8 @@
from typing import Any, Optional, Union
import paddle
from fastdeploy.config import FDConfig
from fastdeploy.model_executor.layers.utils import get_tensor
@@ -155,10 +157,16 @@ def default_weight_loader(fd_config: FDConfig) -> None:
def fn(param, loaded_weight, shard_id: Optional[Union[int, str]] = None):
"""fn"""
output_dim = getattr(param, "output_dim", None)
model_format = getattr(param, "model_format", "")
if model_format == "torch":
loaded_weight = loaded_weight.transpose([1, 0])
# Tensor parallelism splits the weight along the output_dim
if output_dim is not None and fd_config.parallel_config.tensor_parallel_size > 1:
dim = -1 if output_dim else 0
size = loaded_weight.get_shape()[dim]
if isinstance(loaded_weight, paddle.Tensor):
size = loaded_weight.shape[dim]
else:
size = loaded_weight.get_shape()[dim]
block_size = size // fd_config.parallel_config.tensor_parallel_size
shard_offset = fd_config.parallel_config.tensor_parallel_rank * block_size
shard_size = (fd_config.parallel_config.tensor_parallel_rank + 1) * block_size