mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
【New Feature】W4afp8 supports per group quantization (#4272)
* w4afp8 支持per group * code style * 精度完成 * revert append attn utils * ffn1 动态量化 * ffn2 支持动态量化 * code style * code style * 修改单测 * 修改单测 * fix bug * Implement conditional parameter creation for layers Add parameter creation for up_gate_proj_in_scale when ep_size > 1. * code style * fix conflict * code style * code style * 修复w4aint8 精度 * fix ci --------- Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com>
This commit is contained in:
@@ -39,6 +39,7 @@ class MixQuantConfig(QuantConfigBase):
|
||||
is_permuted: bool = True,
|
||||
is_quantized: bool = False,
|
||||
hadamard_block_size: int = 128,
|
||||
moe_dynamic_quant: bool = False,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.dense_quant_type = dense_quant_type
|
||||
@@ -57,6 +58,7 @@ class MixQuantConfig(QuantConfigBase):
|
||||
self.is_checkpoint_bf16 = not is_quantized
|
||||
self.is_quantized = is_quantized
|
||||
self.hadamard_block_size = hadamard_block_size
|
||||
self.moe_dynamic_quant = moe_dynamic_quant
|
||||
|
||||
def name(self) -> str:
|
||||
return "mix_quant"
|
||||
@@ -73,6 +75,7 @@ class MixQuantConfig(QuantConfigBase):
|
||||
config.get("is_permuted", True),
|
||||
config.get("is_quantized", False),
|
||||
config.get("hadamard_block_size", 128),
|
||||
config.get("moe_dynamic_quant", False),
|
||||
)
|
||||
|
||||
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
|
||||
|
||||
Reference in New Issue
Block a user