mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-18 22:44:39 +08:00
[Sync] Update to latest code (#2679)
* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
@@ -111,6 +111,8 @@ class Attention(nn.Layer):
|
||||
k: paddle.Tensor = None,
|
||||
v: paddle.Tensor = None,
|
||||
qkv: paddle.Tensor = None,
|
||||
compressed_kv: paddle.Tensor = None,
|
||||
k_pe: paddle.Tensor = None,
|
||||
forward_meta: ForwardMeta = None,
|
||||
) -> paddle.Tensor:
|
||||
"""
|
||||
@@ -120,12 +122,16 @@ class Attention(nn.Layer):
|
||||
k: the key tensor
|
||||
v: the value tensor
|
||||
forward_meta: the forward meta data
|
||||
compressed_kv: optional compressed key-value cache (for MLA)
|
||||
k_pe: optional key positional encoding (for MLA)
|
||||
"""
|
||||
return forward_meta.attn_backend.forward(
|
||||
q,
|
||||
k,
|
||||
v,
|
||||
qkv,
|
||||
compressed_kv,
|
||||
k_pe,
|
||||
self,
|
||||
forward_meta,
|
||||
)
|
||||
|
Reference in New Issue
Block a user