[Sync] Update to latest code (#2679)

* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-11-03 02:53:26 +08:00 · 2025-07-03 15:43:53 +08:00
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions
--- a/fastdeploy/model_executor/layers/backends/xpu/quantization/weight_only.py
+++ b/fastdeploy/model_executor/layers/backends/xpu/quantization/weight_only.py
@@ -41,16 +41,12 @@ class XPUWeightOnlyLinearMethod(WeightOnlyLinearMethod):
        """
        Create weights for linear layer on XPU
        """
+        # The scale shape should be equal to the output dim of weight using Per-Channel Quantization.
+        linear_weight_scale_shape = [layer.linear_weight_shape[1]]
        layer.linear_weight_shape.reverse()
        if self.quant_config.name() == "weight_only_int4":
            layer.linear_weight_shape[0] //= 2
        layer.weight_dtype = "int8"
-        linear_weight_scale_shape = [layer.embed_dim]
-        if hasattr(layer, "linear_weight_shape"):
-            if isinstance(layer.linear_weight_shape, list):
-                layer_weight_shape = layer.linear_weight_shape
-                linear_weight_scale_shape = layer_weight_shape[:1]
-
        layer.linear_weight_scale = layer.create_parameter(
            shape=linear_weight_scale_shape,
            dtype="float32",