polish code with new pre-commit rule (#2923)

2025-10-05 16:48:03 +08:00 · 2025-07-19 23:19:27 +08:00
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions
--- a/fastdeploy/model_executor/layers/quantization/wfp8afp8.py
+++ b/fastdeploy/model_executor/layers/quantization/wfp8afp8.py
@@ -13,14 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
+
 from typing import Optional

 import paddle

 from fastdeploy.model_executor.layers.quantization.ops import (
-    cutlass_scaled_mm, scaled_fp8_quant)
+    cutlass_scaled_mm,
+    scaled_fp8_quant,
+)
 from fastdeploy.model_executor.layers.quantization.quant_base import (
-    QuantConfigBase, QuantMethodBase)
+    QuantConfigBase,
+    QuantMethodBase,
+)


 class WFP8AFP8Config(QuantConfigBase):
@@ -37,21 +42,18 @@ class WFP8AFP8Config(QuantConfigBase):
        self.quant_round_type = 1

    def name(self) -> str:
-        """
-        """
+        """ """
        return "wfp8afp8"

    @classmethod
    def from_config(cls, config: dict) -> "WFP8AFP8Config":
-        """
-        """
+        """ """
        weight_scale_dict = config.get("weight_scale_dict", None)
        act_scale_dict = config.get("act_scale_dict", None)
        return cls(weight_scale_dict, act_scale_dict)

    def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
-        """
-        """
+        """ """
        return WFP8AFP8LinearMethod(self)


@@ -68,8 +70,7 @@ class WFP8AFP8LinearMethod(QuantMethodBase):
        self.quant_config = quant_config

    def create_weights(self, layer):
-        """
-        """
+        """ """
        layer.weight_shape.reverse()
        layer.weight_dtype = "float8_e4m3fn"
        # TODO(YuanRisheng): set weight logic should be moved to process_loaded_weights func
@@ -82,8 +83,7 @@ class WFP8AFP8LinearMethod(QuantMethodBase):
        )

    def process_loaded_weights(self, layer, weights) -> None:
-        """
-        """
+        """ """
        if self.skip_quant:
            weight_tensor = weights.cast(layer._dtype)
            layer.weight.set_value(weight_tensor)
@@ -99,18 +99,21 @@ class WFP8AFP8LinearMethod(QuantMethodBase):
        layer.weight_scale.set_value(weight_scale)

    def apply(self, layer, x):
-        """
-        """
+        """ """
        if self.skip_quant:
            linear_out = paddle.matmul(x, layer.weight, False, True)
            return linear_out
        if self.use_per_token_if_dynamic:
            out_type = x.dtype
-            a_q, a_scales = scaled_fp8_quant(
-                x, use_per_token_if_dynamic=self.use_per_token_if_dynamic)
-            linear_out = cutlass_scaled_mm(a_q, layer.weight, a_scales,
-                                           layer.weight_scale, out_type,
-                                           layer.bias)
+            a_q, a_scales = scaled_fp8_quant(x, use_per_token_if_dynamic=self.use_per_token_if_dynamic)
+            linear_out = cutlass_scaled_mm(
+                a_q,
+                layer.weight,
+                a_scales,
+                layer.weight_scale,
+                out_type,
+                layer.bias,
+            )
        else:
            raise NotImplementedError
        return linear_out