Sync v2.0 version of code to github repo

2025-11-02 20:54:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/model_executor/layers/attention/ops/init.py
+++ b/fastdeploy/model_executor/layers/attention/ops/init.py
@@ -14,10 +14,13 @@
 # limitations under the License.
 """

-from .get_block_shape_and_split_kv_block import get_block_shape_and_split_kv_block
 from .append_attention import append_attention
+from .get_block_shape_and_split_kv_block import \
+    get_block_shape_and_split_kv_block
+from .init_signal_layerwise import init_signal_layerwise
+from .open_shm_and_get_meta_signal import open_shm_and_get_meta_signal

 __all__ = [
-    "get_block_shape_and_split_kv_block",
-    "append_attention"
-]
+    "get_block_shape_and_split_kv_block", "append_attention",
+    "open_shm_and_get_meta_signal", "init_signal_layerwise"
+]
--- a/fastdeploy/model_executor/layers/attention/ops/append_attention.py
+++ b/fastdeploy/model_executor/layers/attention/ops/append_attention.py
@@ -14,10 +14,16 @@
 # limitations under the License.
 """

-import paddle
 from typing import Optional
+
+import paddle
+
 from fastdeploy.platforms import current_platform

+if current_platform.is_cuda():
+    from fastdeploy.model_executor.ops.gpu import \
+        append_attention as append_attention_gpu
+

 def append_attention(
    qkv: paddle.Tensor,
@@ -68,14 +74,12 @@ def append_attention(
    speculate_max_draft_token_num: int = 1,
    causal: bool = True,
    speculate_decoder: bool = False,
-):
+) -> paddle.Tensor:
    """
-    Args:
-    Returns:
+    append_attention
    """
    if current_platform.is_cuda():
-        from fastdeploy.model_executor.ops.gpu import append_attention
-        out = append_attention(
+        out = append_attention_gpu(
            qkv,
            key_cache,
            value_cache,
--- a/fastdeploy/model_executor/layers/attention/ops/init_signal_layerwise.py
+++ b/fastdeploy/model_executor/layers/attention/ops/init_signal_layerwise.py
@@ -0,0 +1,34 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import paddle
+
+from fastdeploy.platforms import current_platform
+
+
+def init_signal_layerwise(
+    kv_signal_metadata: paddle.Tensor,
+    layer_id: int = 0,
+) -> paddle.Tensor:
+    """
+    init_signal_layerwise
+    """
+    if current_platform.is_cuda():
+        from fastdeploy.model_executor.ops.gpu import init_signal_layerwise
+        out = init_signal_layerwise(kv_signal_metadata, layer_id)
+        return out
+    else:
+        raise NotImplementedError()
--- a/fastdeploy/model_executor/layers/attention/ops/open_shm_and_get_meta_signal.py
+++ b/fastdeploy/model_executor/layers/attention/ops/open_shm_and_get_meta_signal.py
@@ -0,0 +1,35 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+import paddle
+
+from fastdeploy.platforms import current_platform
+
+
+def open_shm_and_get_meta_signal(
+    rank: int = 0,
+    device_id: int = 0,
+    keep_pd_step_flag: bool = False,
+) -> paddle.Tensor:
+    """
+    open_shm_and_get_meta_signal
+    """
+    if current_platform.is_cuda():
+        from fastdeploy.model_executor.ops.gpu import \
+            open_shm_and_get_meta_signal
+        out = open_shm_and_get_meta_signal(rank, device_id, keep_pd_step_flag)
+        return out
+    else:
+        raise NotImplementedError()