[XPU] Supports BF16 for ERNIE-4.5-21B-A3B and ERNIE-4.5-0.3B (#2765)

* fix no quant xpu moe * change dir of xpu moe weight only
2025-09-27 04:46:16 +08:00 · 2025-07-09 15:57:51 +08:00
parent 771e71a24d
commit be21ef5047
5 changed files with 234 additions and 117 deletions
--- a/fastdeploy/model_executor/layers/quantization/weight_only.py
+++ b/fastdeploy/model_executor/layers/quantization/weight_only.py
@@ -60,8 +60,10 @@ class WeightOnlyConfig(QuantConfigBase):

    def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
        if current_platform.is_xpu():
-            from fastdeploy.model_executor.layers.backends import (
-                XPUWeightOnlyLinearMethod, XPUWeightOnlyMoEMethod)
+            from fastdeploy.model_executor.layers.backends import \
+                XPUWeightOnlyLinearMethod
+            from fastdeploy.model_executor.layers.moe.fused_moe_xpu_backend import \
+                XPUWeightOnlyMoEMethod
            if isinstance(layer, FusedMoE):
                return XPUWeightOnlyMoEMethod(self)
            else: