mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Inference, rename] remove padding_offsets from atten use batch_id_per_token (#2880)
* remove padding_offsets from atten
This commit is contained in:
@@ -216,7 +216,7 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
forward_meta.seq_lens_encoder,
|
||||
forward_meta.seq_lens_decoder,
|
||||
forward_meta.seq_lens_this_time,
|
||||
forward_meta.padding_offset,
|
||||
forward_meta.batch_id_per_token,
|
||||
forward_meta.cu_seqlens_q,
|
||||
metadata.block_tables,
|
||||
metadata.encoder_batch_ids,
|
||||
|
@@ -32,7 +32,7 @@ def append_attention(
|
||||
seq_lens_encoder: paddle.Tensor,
|
||||
seq_lens_decoder: paddle.Tensor,
|
||||
seq_lens_this_time: paddle.Tensor,
|
||||
padding_offsets: paddle.Tensor,
|
||||
batch_id_per_token: paddle.Tensor,
|
||||
cu_seqlens_q: paddle.Tensor,
|
||||
block_tables: paddle.Tensor,
|
||||
encoder_batch_ids: paddle.Tensor,
|
||||
@@ -86,7 +86,7 @@ def append_attention(
|
||||
seq_lens_encoder,
|
||||
seq_lens_decoder,
|
||||
seq_lens_this_time,
|
||||
padding_offsets,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
block_tables,
|
||||
encoder_batch_ids,
|
||||
|
Reference in New Issue
Block a user