[v1 loader]qwen Offline fp8 (#4036)

* support offline fp8

* update ut

* update ut

* update ut

* fix

* update

* update
This commit is contained in:
bukejiyu
2025-09-15 13:44:11 +08:00
committed by GitHub
parent b1a5b756a3
commit 29ed617f0f
21 changed files with 440 additions and 138 deletions

View File

@@ -37,7 +37,7 @@ class MixQuantConfig(QuantConfigBase):
is_channel_wise: bool = False,
has_zero_point: bool = False,
is_permuted: bool = True,
is_checkpoint_bf16: bool = False,
is_quantized: bool = False,
hadamard_block_size: int = 128,
) -> None:
super().__init__()
@@ -54,7 +54,8 @@ class MixQuantConfig(QuantConfigBase):
self.quant_min_bound = 0
self.quant_round_type = 0
self.is_permuted = is_permuted
self.is_checkpoint_bf16 = is_checkpoint_bf16
self.is_checkpoint_bf16 = not is_quantized
self.is_quantized = is_quantized
self.hadamard_block_size = hadamard_block_size
def name(self) -> str:
@@ -70,7 +71,7 @@ class MixQuantConfig(QuantConfigBase):
config.get("is_channel_wise", False),
config.get("has_zero_point", False),
config.get("is_permuted", True),
config.get("is_checkpoint_bf16", False),
config.get("is_quantized", False),
config.get("hadamard_block_size", 128),
)
@@ -82,7 +83,7 @@ class MixQuantConfig(QuantConfigBase):
.from_config(
{
"is_permuted": self.is_permuted,
"is_checkpoint_bf16": self.is_checkpoint_bf16,
"is_quantized": self.is_quantized,
"hadamard_block_size": self.hadamard_block_size,
}
)
@@ -94,7 +95,7 @@ class MixQuantConfig(QuantConfigBase):
.from_config(
{
"is_permuted": self.is_permuted,
"is_checkpoint_bf16": self.is_checkpoint_bf16,
"is_quantized": self.is_quantized,
"hadamard_block_size": self.hadamard_block_size,
}
)
@@ -112,6 +113,6 @@ class MixQuantConfig(QuantConfigBase):
else:
return (
get_quantization_config(self.dense_quant_type)
.from_config({"is_checkpoint_bf16": self.is_checkpoint_bf16})
.from_config({"is_quantized": self.is_quantized})
.get_quant_method(layer)
)