[Feature] support qwen3-embedding model load (#4202)

* support qwen3-embedding * fix ci bug * fix * fix ci bug * fix ci bug * fix
2025-10-06 00:57:33 +08:00 · 2025-09-23 15:14:35 +08:00
parent 9082f625ba
commit c96a535a5d
5 changed files with 315 additions and 63 deletions
--- a/fastdeploy/model_executor/layers/utils.py
+++ b/fastdeploy/model_executor/layers/utils.py
@@ -45,6 +45,14 @@ if cache_params != "none":
    c8_state_dict = paddle.load(cache_params, return_numpy=True)


+DEFAULT_VOCAB_PADDING_SIZE = 64
+
+
+def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_SIZE) -> int:
+    """Pad the vocab size to the given value."""
+    return ((vocab_size + pad_to - 1) // pad_to) * pad_to
+
+
 def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Tensor, Tensor]:
    """
    Only used in deep_gemm block wise quant weight.
@@ -372,3 +380,14 @@ def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str])
        paddle.Tensor: An empty tensor with the specified shape and data type.
    """
    return paddle.empty(list(shape), dtype=dtype)
+
+
+def vocab_range_from_per_partition_vocab_size(per_partition_vocab_size: int, rank: int, offset: int = 0):
+    index_f = rank * per_partition_vocab_size
+    index_l = index_f + per_partition_vocab_size
+    return index_f + offset, index_l + offset
+
+
+def vocab_range_from_global_vocab_size(global_vocab_size: int, rank: int, world_size: int, offset: int = 0):
+    per_partition_vocab_size = divide(global_vocab_size, world_size)
+    return vocab_range_from_per_partition_vocab_size(per_partition_vocab_size, rank, offset=offset)