[XPU] Supports BF16 for ERNIE-4.5-21B-A3B and ERNIE-4.5-0.3B (#2765)

* fix no quant xpu moe

* change dir of xpu moe weight only
This commit is contained in:
yulangz
2025-07-09 15:57:51 +08:00
committed by GitHub
parent 771e71a24d
commit be21ef5047
5 changed files with 234 additions and 117 deletions

View File

@@ -60,8 +60,10 @@ class WeightOnlyConfig(QuantConfigBase):
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
if current_platform.is_xpu():
from fastdeploy.model_executor.layers.backends import (
XPUWeightOnlyLinearMethod, XPUWeightOnlyMoEMethod)
from fastdeploy.model_executor.layers.backends import \
XPUWeightOnlyLinearMethod
from fastdeploy.model_executor.layers.moe.fused_moe_xpu_backend import \
XPUWeightOnlyMoEMethod
if isinstance(layer, FusedMoE):
return XPUWeightOnlyMoEMethod(self)
else: