[NewFeatures] support eplb (#3547)

* [NewFeatures] support eplb * fix eplb
2025-10-05 08:37:06 +08:00 · 2025-08-26 16:19:30 +08:00
parent 56e2d7e668
commit 9afa236e39
17 changed files with 174 additions and 67 deletions
--- a/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
+++ b/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
@@ -108,7 +108,7 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
        layer.weight.copy_(quanted_weight_tensor, False)
        layer.weight_scale.set_value(weight_block_scale_tensor)

-    def process_prequanted_weights(self, layer, state_dict):
+    def process_prequanted_weights(self, layer, state_dict, is_rearrange: bool = False):
        """
        process_prequanted_weights
        """
--- a/fastdeploy/model_executor/layers/quantization/tensor_wise_fp8.py
+++ b/fastdeploy/model_executor/layers/quantization/tensor_wise_fp8.py
@@ -90,7 +90,7 @@ class TensorWiseFP8LinearMethod(QuantMethodBase):
            default_initializer=paddle.nn.initializer.Constant(0),
        )

-    def process_prequanted_weights(self, layer, state_dict) -> None:
+    def process_prequanted_weights(self, layer, state_dict, is_rearrange: bool = False) -> None:
        """
        Process pre-quantized weights before applying them to the model
        Args:
--- a/fastdeploy/model_executor/layers/quantization/weight_only.py
+++ b/fastdeploy/model_executor/layers/quantization/weight_only.py
@@ -305,7 +305,7 @@ class GPUWeightOnlyLinearMethod(WeightOnlyLinearMethod):
    ) -> None:
        super().__init__(quant_config)

-    def process_prequanted_weights(self, layer, state_dict) -> None:
+    def process_prequanted_weights(self, layer, state_dict, is_rearrange: bool = False) -> None:
        """
        Process pre-quantized weights before applying them to the model
        Args: