[v1 loader]qwen Offline fp8 (#4036)

* support offline fp8 * update ut * update ut * update ut * fix * update * update
2025-10-05 16:48:03 +08:00 · 2025-09-15 13:44:11 +08:00
parent b1a5b756a3
commit 29ed617f0f
21 changed files with 440 additions and 138 deletions
--- a/fastdeploy/model_executor/layers/quantization/mix_quant.py
+++ b/fastdeploy/model_executor/layers/quantization/mix_quant.py
@@ -37,7 +37,7 @@ class MixQuantConfig(QuantConfigBase):
        is_channel_wise: bool = False,
        has_zero_point: bool = False,
        is_permuted: bool = True,
-        is_checkpoint_bf16: bool = False,
+        is_quantized: bool = False,
        hadamard_block_size: int = 128,
    ) -> None:
        super().__init__()
@@ -54,7 +54,8 @@ class MixQuantConfig(QuantConfigBase):
        self.quant_min_bound = 0
        self.quant_round_type = 0
        self.is_permuted = is_permuted
-        self.is_checkpoint_bf16 = is_checkpoint_bf16
+        self.is_checkpoint_bf16 = not is_quantized
+        self.is_quantized = is_quantized
        self.hadamard_block_size = hadamard_block_size

    def name(self) -> str:
@@ -70,7 +71,7 @@ class MixQuantConfig(QuantConfigBase):
            config.get("is_channel_wise", False),
            config.get("has_zero_point", False),
            config.get("is_permuted", True),
-            config.get("is_checkpoint_bf16", False),
+            config.get("is_quantized", False),
            config.get("hadamard_block_size", 128),
        )

@@ -82,7 +83,7 @@ class MixQuantConfig(QuantConfigBase):
                    .from_config(
                        {
                            "is_permuted": self.is_permuted,
-                            "is_checkpoint_bf16": self.is_checkpoint_bf16,
+                            "is_quantized": self.is_quantized,
                            "hadamard_block_size": self.hadamard_block_size,
                        }
                    )
@@ -94,7 +95,7 @@ class MixQuantConfig(QuantConfigBase):
                    .from_config(
                        {
                            "is_permuted": self.is_permuted,
-                            "is_checkpoint_bf16": self.is_checkpoint_bf16,
+                            "is_quantized": self.is_quantized,
                            "hadamard_block_size": self.hadamard_block_size,
                        }
                    )
@@ -112,6 +113,6 @@ class MixQuantConfig(QuantConfigBase):
        else:
            return (
                get_quantization_config(self.dense_quant_type)
-                .from_config({"is_checkpoint_bf16": self.is_checkpoint_bf16})
+                .from_config({"is_quantized": self.is_quantized})
                .get_quant_method(layer)
            )