[NewFeatures] support eplb (#3547)

* [NewFeatures] support eplb * fix eplb
2025-10-02 23:32:48 +08:00 · 2025-08-26 16:19:30 +08:00
parent 56e2d7e668
commit 9afa236e39
17 changed files with 174 additions and 67 deletions
--- a/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
+++ b/fastdeploy/model_executor/layers/quantization/block_wise_fp8.py
@@ -108,7 +108,7 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
        layer.weight.copy_(quanted_weight_tensor, False)
        layer.weight_scale.set_value(weight_block_scale_tensor)

-    def process_prequanted_weights(self, layer, state_dict):
+    def process_prequanted_weights(self, layer, state_dict, is_rearrange: bool = False):
        """
        process_prequanted_weights
        """