load hadamard_block_size from config (#3797)

This commit is contained in:
Yuan Xiaolan
2025-09-05 17:07:58 +08:00
committed by GitHub
parent 41aee08982
commit 2cf55168ca
10 changed files with 60 additions and 30 deletions

View File

@@ -38,6 +38,7 @@ class MixQuantConfig(QuantConfigBase):
has_zero_point: bool = False,
is_permuted: bool = True,
is_checkpoint_bf16: bool = False,
hadamard_block_size: int = 128,
) -> None:
super().__init__()
self.dense_quant_type = dense_quant_type
@@ -54,6 +55,7 @@ class MixQuantConfig(QuantConfigBase):
self.quant_round_type = 0
self.is_permuted = is_permuted
self.is_checkpoint_bf16 = is_checkpoint_bf16
self.hadamard_block_size = hadamard_block_size
def name(self) -> str:
return "mix_quant"
@@ -69,6 +71,7 @@ class MixQuantConfig(QuantConfigBase):
config.get("has_zero_point", False),
config.get("is_permuted", True),
config.get("is_checkpoint_bf16", False),
config.get("hadamard_block_size", 128),
)
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
@@ -76,13 +79,25 @@ class MixQuantConfig(QuantConfigBase):
if layer.moe_tag == "Image":
return (
get_quantization_config(self.image_moe_quant_type)
.from_config({"is_permuted": self.is_permuted, "self.is_checkpoint_bf16": self.is_checkpoint_bf16})
.from_config(
{
"is_permuted": self.is_permuted,
"self.is_checkpoint_bf16": self.is_checkpoint_bf16,
"hadamard_block_size": self.hadamard_block_size,
}
)
.get_quant_method(layer)
)
else:
return (
get_quantization_config(self.moe_quant_type)
.from_config({"is_permuted": self.is_permuted, "self.is_checkpoint_bf16": self.is_checkpoint_bf16})
.from_config(
{
"is_permuted": self.is_permuted,
"self.is_checkpoint_bf16": self.is_checkpoint_bf16,
"hadamard_block_size": self.hadamard_block_size,
}
)
.get_quant_method(layer)
)
elif isinstance(layer, Attention):