mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[FIX]Fix Machete compile via ENABLE_MACHETE (#3727)
* add ENABLE_MACHETE * fix * revert * update * pre_commit * fix * fix --------- Co-authored-by: Ayakouji <yuhongh@qq.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: aquagull <hongyuh@qq.com>
This commit is contained in:
@@ -26,8 +26,14 @@ def get_sm_version():
|
||||
return cc
|
||||
|
||||
|
||||
_ENABLE_MACHETE = False
|
||||
if current_platform.is_cuda() and get_sm_version() == 90:
|
||||
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
|
||||
try:
|
||||
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
|
||||
|
||||
_ENABLE_MACHETE = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_pack_factor(num_bits):
|
||||
|
@@ -34,12 +34,6 @@ from ..utils import get_tensor
|
||||
from .quant_base import QuantConfigBase, QuantMethodBase
|
||||
|
||||
|
||||
def get_sm_version():
|
||||
prop = paddle.device.cuda.get_device_properties()
|
||||
cc = prop.major * 10 + prop.minor
|
||||
return cc
|
||||
|
||||
|
||||
class WeightOnlyConfig(QuantConfigBase):
|
||||
"""
|
||||
Quantization config for weight only
|
||||
@@ -139,10 +133,14 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
else:
|
||||
raise ValueError(f"Unsupported MOE backend {layer.use_method}")
|
||||
else:
|
||||
from fastdeploy.model_executor.layers.quantization.ops.machete_mm import (
|
||||
_ENABLE_MACHETE,
|
||||
)
|
||||
|
||||
if (
|
||||
self.name() == "wint4"
|
||||
and _ENABLE_MACHETE
|
||||
and envs.FD_USE_MACHETE == "1"
|
||||
and get_sm_version() == 90
|
||||
and layer.weight_shape[1]
|
||||
and layer.weight_shape[1] % 128 == 0
|
||||
):
|
||||
|
Reference in New Issue
Block a user