Revert "【New Feature】W4afp8 supports per group quantization (#4272)" (#4854)

This reverts commit 93fcf7e4ec.
2025-12-24 13:28:13 +08:00 · 2025-11-06 17:48:28 +08:00
parent 3478d20262
commit 819b2dbbae
26 changed files with 1718 additions and 4378 deletions
--- a/fastdeploy/model_executor/layers/quantization/mix_quant.py
+++ b/fastdeploy/model_executor/layers/quantization/mix_quant.py
@@ -39,7 +39,6 @@ class MixQuantConfig(QuantConfigBase):
        is_permuted: bool = True,
        is_quantized: bool = False,
        hadamard_block_size: int = 128,
-        moe_dynamic_quant: bool = False,
    ) -> None:
        super().__init__()
        self.dense_quant_type = dense_quant_type
@@ -58,7 +57,6 @@ class MixQuantConfig(QuantConfigBase):
        self.is_checkpoint_bf16 = not is_quantized
        self.is_quantized = is_quantized
        self.hadamard_block_size = hadamard_block_size
-        self.moe_dynamic_quant = moe_dynamic_quant

    def name(self) -> str:
        return "mix_quant"
@@ -75,7 +73,6 @@ class MixQuantConfig(QuantConfigBase):
            config.get("is_permuted", True),
            config.get("is_quantized", False),
            config.get("hadamard_block_size", 128),
-            config.get("moe_dynamic_quant", False),
        )

    def get_quant_method(self, layer) -> Optional[QuantMethodBase]: