mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Feature] support qwen3-embedding model load (#4202)
* support qwen3-embedding * fix ci bug * fix * fix ci bug * fix ci bug * fix
This commit is contained in:
@@ -22,6 +22,10 @@ from paddle import nn
|
||||
from paddle.distributed import fleet
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.model_executor.layers.utils import (
|
||||
DEFAULT_VOCAB_PADDING_SIZE,
|
||||
pad_vocab_size,
|
||||
)
|
||||
from fastdeploy.model_executor.utils import (
|
||||
default_weight_loader,
|
||||
set_weight_attrs,
|
||||
@@ -44,6 +48,7 @@ class ParallelLMHead(nn.Layer):
|
||||
prefix: str = "",
|
||||
with_bias: bool = False,
|
||||
dtype: str = None,
|
||||
padding_size: int = DEFAULT_VOCAB_PADDING_SIZE,
|
||||
) -> None:
|
||||
"""
|
||||
Parallelized LMhead.
|
||||
@@ -68,6 +73,10 @@ class ParallelLMHead(nn.Layer):
|
||||
self.column_cut = True
|
||||
self.nranks = fd_config.parallel_config.tensor_parallel_size
|
||||
self.fd_config = fd_config
|
||||
self.padding_size = padding_size
|
||||
|
||||
if num_embeddings % self.nranks != 0:
|
||||
num_embeddings = pad_vocab_size(num_embeddings, self.padding_size)
|
||||
|
||||
ColumnParallelLinear = fleet.meta_parallel.ColumnParallelLinear
|
||||
RowParallelLinear = fleet.meta_parallel.RowParallelLinear
|
||||
|
Reference in New Issue
Block a user