mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
dcu adapter ernie45t (#2756)
Co-authored-by: lifu <lifu@sugon.com> Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
@@ -75,6 +75,15 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
return GCUWeightOnlyMoEMethod(self)
|
||||
else:
|
||||
return GCUWeightOnlyLinearMethod(self)
|
||||
elif current_platform.is_dcu():
|
||||
if isinstance(layer, FusedMoE):
|
||||
from fastdeploy.model_executor.layers.backends import (
|
||||
DCUTritonWeightOnlyMoEMethod)
|
||||
return DCUTritonWeightOnlyMoEMethod(self)
|
||||
else:
|
||||
from fastdeploy.model_executor.layers.backends import (
|
||||
DCUWeightOnlyLinearMethod)
|
||||
return DCUWeightOnlyLinearMethod(self)
|
||||
else:
|
||||
if isinstance(layer, FusedMoE):
|
||||
if layer.use_method == "cutlass":
|
||||
|
Reference in New Issue
Block a user