Sync v2.0 version of code to github repo

2025-10-05 16:48:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/model_executor/layers/backends/xpu/utils.py
+++ b/fastdeploy/model_executor/layers/backends/xpu/utils.py
@@ -16,11 +16,13 @@
 !! This file will be deleted after the platform is fully functional
 """

+from typing import Tuple
+
 import numpy as np
 import paddle


-def xpu_clip_and_round(x):
+def xpu_clip_and_round(x: np.ndarray) -> np.ndarray:
    """
    Clip and round the input array to the range [-127, 127] and convert to int8.

@@ -33,7 +35,8 @@ def xpu_clip_and_round(x):
    return np.clip(np.around(x), -127, 127).astype("int8")


-def xpu_quant_qkv_weight(weight_np):
+def xpu_quant_qkv_weight(
+        weight_np: np.ndarray) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """
    Quantize the query, key, and value weights for the Transformer model.

@@ -61,7 +64,8 @@ def xpu_quant_qkv_weight(weight_np):
    return quanted_weight, weight_scales


-def xpu_quant_weight(weight_np):
+def xpu_quant_weight(
+        weight_np: np.ndarray) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """
    Quantize the weight tensor for XPU devices.