mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
[XPU] Supports BF16 for ERNIE-4.5-21B-A3B and ERNIE-4.5-0.3B (#2765)
* fix no quant xpu moe * change dir of xpu moe weight only
This commit is contained in:
@@ -60,8 +60,10 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
|
||||
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
|
||||
if current_platform.is_xpu():
|
||||
from fastdeploy.model_executor.layers.backends import (
|
||||
XPUWeightOnlyLinearMethod, XPUWeightOnlyMoEMethod)
|
||||
from fastdeploy.model_executor.layers.backends import \
|
||||
XPUWeightOnlyLinearMethod
|
||||
from fastdeploy.model_executor.layers.moe.fused_moe_xpu_backend import \
|
||||
XPUWeightOnlyMoEMethod
|
||||
if isinstance(layer, FusedMoE):
|
||||
return XPUWeightOnlyMoEMethod(self)
|
||||
else:
|
||||
|
Reference in New Issue
Block a user