[Feature] support qwen3-embedding model load (#4202)

* support qwen3-embedding

* fix ci bug

* fix

* fix ci bug

* fix ci bug

* fix
This commit is contained in:
lizexu123
2025-09-23 15:14:35 +08:00
committed by GitHub
parent 9082f625ba
commit c96a535a5d
5 changed files with 315 additions and 63 deletions

View File

@@ -45,6 +45,14 @@ if cache_params != "none":
c8_state_dict = paddle.load(cache_params, return_numpy=True)
DEFAULT_VOCAB_PADDING_SIZE = 64
def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_SIZE) -> int:
"""Pad the vocab size to the given value."""
return ((vocab_size + pad_to - 1) // pad_to) * pad_to
def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Tensor, Tensor]:
"""
Only used in deep_gemm block wise quant weight.
@@ -372,3 +380,14 @@ def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str])
paddle.Tensor: An empty tensor with the specified shape and data type.
"""
return paddle.empty(list(shape), dtype=dtype)
def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size: int, rank: int, offset: int = 0):
index_f = rank * per_partition_vocab_size
index_l = index_f + per_partition_vocab_size
return index_f + offset, index_l + offset
def vocab_range_from_global_vocab_size(global_vocab_size: int, rank: int, world_size: int, offset: int = 0):
per_partition_vocab_size = divide(global_vocab_size, world_size)
return vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, rank, offset=offset)