dcu adapter ernie45t (#2756)

Co-authored-by: lifu <lifu@sugon.com>
Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
lifulll
2025-07-09 18:56:27 +08:00
committed by GitHub
parent 03a74995b8
commit 1f28bdf994
30 changed files with 1133 additions and 41 deletions

View File

@@ -75,6 +75,15 @@ class WeightOnlyConfig(QuantConfigBase):
return GCUWeightOnlyMoEMethod(self)
else:
return GCUWeightOnlyLinearMethod(self)
elif current_platform.is_dcu():
if isinstance(layer, FusedMoE):
from fastdeploy.model_executor.layers.backends import (
DCUTritonWeightOnlyMoEMethod)
return DCUTritonWeightOnlyMoEMethod(self)
else:
from fastdeploy.model_executor.layers.backends import (
DCUWeightOnlyLinearMethod)
return DCUWeightOnlyLinearMethod(self)
else:
if isinstance(layer, FusedMoE):
if layer.use_method == "cutlass":