[Feature] support qwen3-embedding model load (#4202)

* support qwen3-embedding

* fix ci bug

* fix

* fix ci bug

* fix ci bug

* fix
This commit is contained in:
lizexu123
2025-09-23 15:14:35 +08:00
committed by GitHub
parent 9082f625ba
commit c96a535a5d
5 changed files with 315 additions and 63 deletions

View File

@@ -22,6 +22,10 @@ from paddle import nn
from paddle.distributed import fleet
from fastdeploy.config import FDConfig
from fastdeploy.model_executor.layers.utils import (
DEFAULT_VOCAB_PADDING_SIZE,
pad_vocab_size,
)
from fastdeploy.model_executor.utils import (
default_weight_loader,
set_weight_attrs,
@@ -44,6 +48,7 @@ class ParallelLMHead(nn.Layer):
prefix: str = "",
with_bias: bool = False,
dtype: str = None,
padding_size: int = DEFAULT_VOCAB_PADDING_SIZE,
) -> None:
"""
Parallelized LMhead.
@@ -68,6 +73,10 @@ class ParallelLMHead(nn.Layer):
self.column_cut = True
self.nranks = fd_config.parallel_config.tensor_parallel_size
self.fd_config = fd_config
self.padding_size = padding_size
if num_embeddings % self.nranks != 0:
num_embeddings = pad_vocab_size(num_embeddings, self.padding_size)
ColumnParallelLinear = fleet.meta_parallel.ColumnParallelLinear
RowParallelLinear = fleet.meta_parallel.RowParallelLinear