polish code with new pre-commit rule (#2923)

2025-10-05 08:37:06 +08:00 · 2025-07-19 23:19:27 +08:00
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions
--- a/fastdeploy/model_executor/layers/quantization/w4afp8.py
+++ b/fastdeploy/model_executor/layers/quantization/w4afp8.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
+
 from typing import Optional

 import paddle
@@ -69,13 +70,14 @@ class W4AFP8LinearMethod(QuantMethodBase):
        pass

    def process_loaded_weights(self, layer, weights) -> None:
-        quanted_weight_tensor, weight_scale_tensor = (
-            fastdeploy.model_executor.ops.gpu.
-            scaled_gemm_f8_i4_f16_weight_quantize(
-                paddle.cast(weights, "float32").cpu(),
-                groupsize=-1,
-                scale_dtype="float16",
-            ))
+        (
+            quanted_weight_tensor,
+            weight_scale_tensor,
+        ) = fastdeploy.model_executor.ops.gpu.scaled_gemm_f8_i4_f16_weight_quantize(
+            paddle.cast(weights, "float32").cpu(),
+            groupsize=-1,
+            scale_dtype="float16",
+        )
        weight_scale_tensor = paddle.view(weight_scale_tensor, layer._dtype)
        layer.weight.set_value(quanted_weight_tensor)
        layer.weight_scale.set_value(weight_scale_tensor)
@@ -87,11 +89,12 @@ class W4AFP8LinearMethod(QuantMethodBase):
            layer.weight_scale,
            zero_points=None,
            bias=layer.bias if layer.add_bias else None,
-            out_scale=self.quant_config.weight_scale_dict.get(layer.prefix +
-                                                              ".weight_scale")
-            / (self.quant_config.act_scale_dict.get(layer.prefix +
-                                                    ".activation_scale") *
-               QUANT_SCALING_FACTOR * QUANT_SCALING_FACTOR),
+            out_scale=self.quant_config.weight_scale_dict.get(layer.prefix + ".weight_scale")
+            / (
+                self.quant_config.act_scale_dict.get(layer.prefix + ".activation_scale")
+                * QUANT_SCALING_FACTOR
+                * QUANT_SCALING_FACTOR
+            ),
            groupsize=0,
            out_dtype=layer._dtype,
        )