Sync v2.0 version of code to github repo

2025-10-05 00:33:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/model_executor/layers/attention/ops/append_attention.py
+++ b/fastdeploy/model_executor/layers/attention/ops/append_attention.py
@@ -14,10 +14,16 @@
 # limitations under the License.
 """

-import paddle
 from typing import Optional
+
+import paddle
+
 from fastdeploy.platforms import current_platform

+if current_platform.is_cuda():
+    from fastdeploy.model_executor.ops.gpu import \
+        append_attention as append_attention_gpu
+

 def append_attention(
    qkv: paddle.Tensor,
@@ -68,14 +74,12 @@ def append_attention(
    speculate_max_draft_token_num: int = 1,
    causal: bool = True,
    speculate_decoder: bool = False,
-):
+) -> paddle.Tensor:
    """
-    Args:
-    Returns:
+    append_attention
    """
    if current_platform.is_cuda():
-        from fastdeploy.model_executor.ops.gpu import append_attention
-        out = append_attention(
+        out = append_attention_gpu(
            qkv,
            key_cache,
            value_cache,