[FIX]Fix Machete compile via ENABLE_MACHETE (#3727)

* add ENABLE_MACHETE

* fix

* revert

* update

* pre_commit

* fix

* fix

---------

Co-authored-by: Ayakouji <yuhongh@qq.com>
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
Co-authored-by: aquagull <hongyuh@qq.com>
This commit is contained in:
Sunny-bot1
2025-08-30 17:50:17 +08:00
committed by GitHub
parent b9af95cf1c
commit fe5d09f9ee
4 changed files with 17 additions and 9 deletions

View File

@@ -26,8 +26,14 @@ def get_sm_version():
return cc
_ENABLE_MACHETE = False
if current_platform.is_cuda() and get_sm_version() == 90:
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
try:
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
_ENABLE_MACHETE = True
except Exception:
pass
def get_pack_factor(num_bits):

View File

@@ -34,12 +34,6 @@ from ..utils import get_tensor
from .quant_base import QuantConfigBase, QuantMethodBase
def get_sm_version():
prop = paddle.device.cuda.get_device_properties()
cc = prop.major * 10 + prop.minor
return cc
class WeightOnlyConfig(QuantConfigBase):
"""
Quantization config for weight only
@@ -139,10 +133,14 @@ class WeightOnlyConfig(QuantConfigBase):
else:
raise ValueError(f"Unsupported MOE backend {layer.use_method}")
else:
from fastdeploy.model_executor.layers.quantization.ops.machete_mm import (
_ENABLE_MACHETE,
)
if (
self.name() == "wint4"
and _ENABLE_MACHETE
and envs.FD_USE_MACHETE == "1"
and get_sm_version() == 90
and layer.weight_shape[1]
and layer.weight_shape[1] % 128 == 0
):