mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Sync] Update to latest code (#2679)
* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
@@ -46,6 +46,8 @@ class AttentionBackend(ABC):
|
||||
k: paddle.Tensor,
|
||||
v: paddle.Tensor,
|
||||
qkv: paddle.Tensor,
|
||||
compressed_kv: paddle.Tensor,
|
||||
k_pe: paddle.Tensor,
|
||||
layer: paddle.nn.Layer,
|
||||
forward_meta: ForwardMeta,
|
||||
) -> paddle.Tensor:
|
||||
@@ -56,6 +58,8 @@ class AttentionBackend(ABC):
|
||||
k: The key tensor.
|
||||
v: The value tensor.
|
||||
layer: The layer that will be used for the forward.
|
||||
compressed_kv: optional compressed key-value cache (for MLA)
|
||||
k_pe: optional key positional encoding (for MLA)
|
||||
forward_meta: The forward metadata.
|
||||
"""
|
||||
if forward_meta.forward_mode.is_mixed():
|
||||
@@ -64,6 +68,8 @@ class AttentionBackend(ABC):
|
||||
k,
|
||||
v,
|
||||
qkv,
|
||||
compressed_kv,
|
||||
k_pe,
|
||||
layer,
|
||||
forward_meta,
|
||||
)
|
||||
@@ -73,6 +79,8 @@ class AttentionBackend(ABC):
|
||||
k,
|
||||
v,
|
||||
qkv,
|
||||
compressed_kv,
|
||||
k_pe,
|
||||
layer,
|
||||
forward_meta,
|
||||
)
|
||||
@@ -82,6 +90,8 @@ class AttentionBackend(ABC):
|
||||
k,
|
||||
v,
|
||||
qkv,
|
||||
compressed_kv,
|
||||
k_pe,
|
||||
layer,
|
||||
forward_meta,
|
||||
)
|
||||
@@ -92,6 +102,8 @@ class AttentionBackend(ABC):
|
||||
k: paddle.Tensor,
|
||||
v: paddle.Tensor,
|
||||
qkv: paddle.Tensor,
|
||||
compressed_kv: paddle.Tensor,
|
||||
k_pe: paddle.Tensor,
|
||||
layer: paddle.nn.Layer,
|
||||
forward_meta: ForwardMeta,
|
||||
) -> paddle.Tensor:
|
||||
@@ -104,6 +116,8 @@ class AttentionBackend(ABC):
|
||||
k: paddle.Tensor,
|
||||
v: paddle.Tensor,
|
||||
qkv: paddle.Tensor,
|
||||
compressed_kv: paddle.Tensor,
|
||||
k_pe: paddle.Tensor,
|
||||
layer: paddle.nn.Layer,
|
||||
forward_meta: ForwardMeta,
|
||||
) -> paddle.Tensor:
|
||||
@@ -116,6 +130,8 @@ class AttentionBackend(ABC):
|
||||
k: paddle.Tensor,
|
||||
v: paddle.Tensor,
|
||||
qkv: paddle.Tensor,
|
||||
compressed_kv: paddle.Tensor,
|
||||
k_pe: paddle.Tensor,
|
||||
layer: paddle.nn.Layer,
|
||||
forward_meta: ForwardMeta,
|
||||
) -> paddle.Tensor:
|
||||
|
Reference in New Issue
Block a user