polish code with new pre-commit rule (#2923)

2025-10-28 10:51:39 +08:00 · 2025-07-19 23:19:27 +08:00
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions
--- a/fastdeploy/model_executor/layers/sample/ops/init.py
+++ b/fastdeploy/model_executor/layers/sample/ops/init.py
@@ -15,7 +15,9 @@
 """

 from .apply_penalty_multi_scores import (
-    apply_penalty_multi_scores, apply_speculative_penalty_multi_scores)
+    apply_penalty_multi_scores,
+    apply_speculative_penalty_multi_scores,
+)
 from .top_k_top_p_sampling import top_k_top_p_sampling

 __all__ = [
--- a/fastdeploy/model_executor/layers/sample/ops/apply_penalty_multi_scores.py
+++ b/fastdeploy/model_executor/layers/sample/ops/apply_penalty_multi_scores.py
@@ -37,8 +37,8 @@ def apply_penalty_multi_scores(
    apply_penalty_multi_scores
    """
    if current_platform.is_cuda():
-        from fastdeploy.model_executor.ops.gpu import \
-            get_token_penalty_multi_scores
+        from fastdeploy.model_executor.ops.gpu import get_token_penalty_multi_scores
+
        logits = get_token_penalty_multi_scores(
            pre_token_ids,
            prompt_ids,
@@ -54,8 +54,8 @@ def apply_penalty_multi_scores(
            eos_token_ids,
        )
    elif current_platform.is_xpu():
-        from fastdeploy.model_executor.ops.xpu import \
-            get_token_penalty_multi_scores
+        from fastdeploy.model_executor.ops.xpu import get_token_penalty_multi_scores
+
        logits = get_token_penalty_multi_scores(
            pre_token_ids,
            logits,
@@ -69,8 +69,10 @@ def apply_penalty_multi_scores(
            eos_token_ids,
        )
    elif current_platform.is_iluvatar():
-        from fastdeploy.model_executor.ops.iluvatar import \
-            get_token_penalty_multi_scores
+        from fastdeploy.model_executor.ops.iluvatar import (
+            get_token_penalty_multi_scores,
+        )
+
        logits = get_token_penalty_multi_scores(
            pre_token_ids,
            prompt_ids,
@@ -86,8 +88,8 @@ def apply_penalty_multi_scores(
            eos_token_ids,
        )
    elif current_platform.is_gcu():
-        from fastdeploy.model_executor.ops.gcu import \
-            get_token_penalty_multi_scores
+        from fastdeploy.model_executor.ops.gcu import get_token_penalty_multi_scores
+
        logits = get_token_penalty_multi_scores(
            pre_token_ids,
            logits,
@@ -101,7 +103,7 @@ def apply_penalty_multi_scores(
            eos_token_ids,
        )
    else:
-        raise NotImplementedError()
+        raise NotImplementedError

    return logits

@@ -126,8 +128,9 @@ def apply_speculative_penalty_multi_scores(
    apply_speculative_penalty_multi_scores
    """
    if current_platform.is_cuda():
-        from fastdeploy.model_executor.ops.gpu import \
-            speculate_get_token_penalty_multi_scores
+        from fastdeploy.model_executor.ops.gpu import (
+            speculate_get_token_penalty_multi_scores,
+        )

        speculate_get_token_penalty_multi_scores(
            pre_token_ids,
@@ -146,6 +149,6 @@ def apply_speculative_penalty_multi_scores(
            max_len,
        )
    else:
-        raise NotImplementedError()
+        raise NotImplementedError
    # inplace
    return logits
--- a/fastdeploy/model_executor/layers/sample/ops/top_k_top_p_sampling.py
+++ b/fastdeploy/model_executor/layers/sample/ops/top_k_top_p_sampling.py
@@ -22,8 +22,8 @@ from fastdeploy import envs
 from fastdeploy.platforms import current_platform

 if current_platform.is_gcu():
-    from fastdeploy.model_executor.ops.gcu import \
-        top_p_sampling as gcu_top_p_sampling
+    from fastdeploy.model_executor.ops.gcu import top_p_sampling as gcu_top_p_sampling
+

 def top_k_top_p_sampling(
    x: paddle.Tensor,
@@ -33,8 +33,8 @@ def top_k_top_p_sampling(
    topp_seed: Optional[paddle.Tensor] = None,
    seed: int = -1,
    k: int = 0,
-    mode: Literal['truncated', 'non-truncated'] = "truncated",
-    order: Literal['top_k_first', 'joint'] = "top_k_first",
+    mode: Literal["truncated", "non-truncated"] = "truncated",
+    order: Literal["top_k_first", "joint"] = "top_k_first",
 ) -> tuple[paddle.Tensor, paddle.Tensor]:
    """
    x(Tensor): An input 2-D Tensor with type float32, float16 and bfloat16.
@@ -61,35 +61,33 @@ def top_k_top_p_sampling(
    """
    top_p_class = envs.FD_SAMPLING_CLASS.lower()
    if top_p_class == "air":
-        _, ids = air_top_p_sampling(x,
-                                    top_p,
-                                    threshold,
-                                    topp_seed,
-                                    seed=seed,
-                                    k=k,
-                                    mode=mode)
+        _, ids = air_top_p_sampling(x, top_p, threshold, topp_seed, seed=seed, k=k, mode=mode)
    elif top_p_class == "rejection":
        ids = rejection_top_p_sampling(x, top_p, top_k, seed, order)
        _ = None
    elif top_p_class == "base_non_truncated":
-        _, ids = paddle.tensor.top_p_sampling(x,
-                                                top_p,
-                                                threshold=threshold,
-                                                topp_seed=topp_seed,
-                                                seed=seed,
-                                                k=k,
-                                                mode="non-truncated")
+        _, ids = paddle.tensor.top_p_sampling(
+            x,
+            top_p,
+            threshold=threshold,
+            topp_seed=topp_seed,
+            seed=seed,
+            k=k,
+            mode="non-truncated",
+        )
    else:
        if current_platform.is_gcu():
            _, ids = gcu_top_p_sampling(x, top_p)
        else:
-            _, ids = paddle.tensor.top_p_sampling(x,
-                                                  top_p,
-                                                  threshold=threshold,
-                                                  topp_seed=topp_seed,
-                                                  seed=seed,
-                                                  k=k,
-                                                  mode="truncated")
+            _, ids = paddle.tensor.top_p_sampling(
+                x,
+                top_p,
+                threshold=threshold,
+                topp_seed=topp_seed,
+                seed=seed,
+                k=k,
+                mode="truncated",
+            )
    return _, ids


@@ -100,15 +98,15 @@ def air_top_p_sampling(
    topp_seed: Optional[paddle.Tensor] = None,
    seed: int = -1,
    k: int = 0,
-    mode: Literal['truncated', 'non-truncated'] = "truncated",
+    mode: Literal["truncated", "non-truncated"] = "truncated",
 ) -> tuple[paddle.Tensor, paddle.Tensor]:
    """
    air_top_p_sampling
    """
    try:
        from fastdeploy.model_executor.ops.gpu import air_top_p_sampling
-        out, ids = air_top_p_sampling(x, top_p, threshold, topp_seed, seed, k,
-                                      mode)
+
+        out, ids = air_top_p_sampling(x, top_p, threshold, topp_seed, seed, k, mode)
    except ImportError:
        raise RuntimeError("Cannot import air_top_p_sampling op.")
    return out, ids
@@ -119,14 +117,16 @@ def rejection_top_p_sampling(
    top_p: paddle.Tensor,
    top_k: paddle.Tensor,
    seed: int = -1,
-    order: Literal['top_k_first', 'joint'] = "top_k_first",
+    order: Literal["top_k_first", "joint"] = "top_k_first",
 ) -> paddle.Tensor:
    """
    rejection_top_p_sampling
    """
    try:
        from fastdeploy.model_executor.ops.gpu import (
-            rejection_top_p_sampling, top_k_renorm_probs)
+            rejection_top_p_sampling,
+            top_k_renorm_probs,
+        )

        if paddle.count_nonzero(top_k) == 0:
            ids = rejection_top_p_sampling(