mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
[Optimize] Machete using group scale default (#4121)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
This commit is contained in:
@@ -161,7 +161,6 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
and envs.FD_USE_MACHETE == "1"
|
||||
and layer.weight_shape[1]
|
||||
and layer.weight_shape[1] % 128 == 0
|
||||
and not layer.add_bias
|
||||
):
|
||||
return MacheteWeightOnlyLinearMethod(self)
|
||||
return GPUWeightOnlyLinearMethod(self)
|
||||
@@ -244,7 +243,8 @@ class WeightOnlyLinearMethod(QuantMethodBase):
|
||||
)
|
||||
else:
|
||||
if isinstance(self, MacheteWeightOnlyLinearMethod):
|
||||
weight_scale_shape = [1, layer.weight_shape[1]]
|
||||
# Using group scale for machete, group size is 128
|
||||
weight_scale_shape = [(layer.weight_shape[0] + 127) // 128, layer.weight_shape[1]]
|
||||
if self.quant_config.name() == "wint4":
|
||||
layer.weight_shape[0] //= 8
|
||||
else:
|
||||
@@ -299,10 +299,12 @@ class WeightOnlyLinearMethod(QuantMethodBase):
|
||||
machete_quantize_and_pack,
|
||||
)
|
||||
|
||||
# Using group scale for machete, group size is 128
|
||||
quanted_weight_tensor, weight_scale_tensor = machete_quantize_and_pack(
|
||||
w=layer.weight,
|
||||
atype=layer._dtype,
|
||||
quant_type="uint4b8" if self.quant_config.name() == "wint4" else "uint8b128",
|
||||
group_size=128,
|
||||
)
|
||||
else:
|
||||
quanted_weight_tensor, weight_scale_tensor = weight_quantize(
|
||||
@@ -404,23 +406,27 @@ class MacheteWeightOnlyLinearMethod(WeightOnlyLinearMethod):
|
||||
machete_quantize_and_pack,
|
||||
)
|
||||
|
||||
# Using group scale for machete, group size is 128
|
||||
quanted_weight_tensor, weight_scale_tensor = machete_quantize_and_pack(
|
||||
w=weight,
|
||||
atype=layer._dtype,
|
||||
quant_type="uint4b8" if self.quant_config.name() == "wint4" else "uint8b128",
|
||||
group_size=128,
|
||||
)
|
||||
layer.weight.set_value(quanted_weight_tensor)
|
||||
layer.weight_scale.set_value(weight_scale_tensor.astype(paddle.get_default_dtype()))
|
||||
|
||||
def apply(self, layer, x):
|
||||
assert layer.bias is None, "Machete weight only linear method does not support bias."
|
||||
from fastdeploy.model_executor.layers.quantization.ops import machete_wint_mm
|
||||
|
||||
# Using group scale for machete, group size is 128
|
||||
linear_out = machete_wint_mm(
|
||||
x,
|
||||
w_prepack=layer.weight,
|
||||
w_g_s=layer.weight_scale,
|
||||
weight_dtype="uint4b8" if self.quant_config.name() == "wint4" else "uint8b128",
|
||||
group_size=128,
|
||||
)
|
||||
|
||||
if layer.with_bias:
|
||||
linear_out = paddle.add(linear_out, layer.bias)
|
||||
return linear_out
|
||||
|
@@ -135,6 +135,8 @@ class WeightOnlyInt4LinearTestCase(unittest.TestCase):
|
||||
weight_dtype="uint4b8" if self.weight_dtype == "int4" else "uint8b128", # weight_dtype
|
||||
group_size=self.machete_group_size,
|
||||
)
|
||||
if self.bias is not None:
|
||||
out = paddle.add(out, self.bias)
|
||||
return out.numpy()
|
||||
|
||||
def test_weight_only_linear(self):
|
||||
@@ -158,7 +160,7 @@ class WeightOnlyInt8LinearTestCase(unittest.TestCase):
|
||||
self.dtype = "float16"
|
||||
self.rtol = 1e-5
|
||||
self.atol = 1e-1
|
||||
self.bias = False
|
||||
self.bias = True
|
||||
self.batch = 1
|
||||
self.token = 512
|
||||
self.in_features = 7168
|
||||
@@ -224,6 +226,8 @@ class WeightOnlyInt8LinearTestCase(unittest.TestCase):
|
||||
weight_dtype="uint4b8" if self.weight_dtype == "int4" else "uint8b128", # weight_dtype
|
||||
group_size=self.machete_group_size,
|
||||
)
|
||||
if self.bias is not None:
|
||||
out = paddle.add(out, self.bias)
|
||||
return out.numpy()
|
||||
|
||||
def test_weight_only_linear(self):
|
||||
|
Reference in New Issue
Block a user