[Sync] Update to latest code (#2679)

* [Sync] Update to latest code

* Add new code files

* Add new code files

* update code

* Try to fix build.sh

* Try to fix build.sh

* Update code

* Update requirements.txt

* Update code

---------

Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-03 15:43:53 +08:00
committed by GitHub
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions

View File

@@ -46,6 +46,8 @@ class AttentionBackend(ABC):
k: paddle.Tensor,
v: paddle.Tensor,
qkv: paddle.Tensor,
compressed_kv: paddle.Tensor,
k_pe: paddle.Tensor,
layer: paddle.nn.Layer,
forward_meta: ForwardMeta,
) -> paddle.Tensor:
@@ -56,6 +58,8 @@ class AttentionBackend(ABC):
k: The key tensor.
v: The value tensor.
layer: The layer that will be used for the forward.
compressed_kv: optional compressed key-value cache (for MLA)
k_pe: optional key positional encoding (for MLA)
forward_meta: The forward metadata.
"""
if forward_meta.forward_mode.is_mixed():
@@ -64,6 +68,8 @@ class AttentionBackend(ABC):
k,
v,
qkv,
compressed_kv,
k_pe,
layer,
forward_meta,
)
@@ -73,6 +79,8 @@ class AttentionBackend(ABC):
k,
v,
qkv,
compressed_kv,
k_pe,
layer,
forward_meta,
)
@@ -82,6 +90,8 @@ class AttentionBackend(ABC):
k,
v,
qkv,
compressed_kv,
k_pe,
layer,
forward_meta,
)
@@ -92,6 +102,8 @@ class AttentionBackend(ABC):
k: paddle.Tensor,
v: paddle.Tensor,
qkv: paddle.Tensor,
compressed_kv: paddle.Tensor,
k_pe: paddle.Tensor,
layer: paddle.nn.Layer,
forward_meta: ForwardMeta,
) -> paddle.Tensor:
@@ -104,6 +116,8 @@ class AttentionBackend(ABC):
k: paddle.Tensor,
v: paddle.Tensor,
qkv: paddle.Tensor,
compressed_kv: paddle.Tensor,
k_pe: paddle.Tensor,
layer: paddle.nn.Layer,
forward_meta: ForwardMeta,
) -> paddle.Tensor:
@@ -116,6 +130,8 @@ class AttentionBackend(ABC):
k: paddle.Tensor,
v: paddle.Tensor,
qkv: paddle.Tensor,
compressed_kv: paddle.Tensor,
k_pe: paddle.Tensor,
layer: paddle.nn.Layer,
forward_meta: ForwardMeta,
) -> paddle.Tensor: