[Inference, rename] remove padding_offsets from atten use batch_id_per_token (#2880)

* remove padding_offsets from atten
This commit is contained in:
周周周
2025-07-17 18:41:31 +08:00
committed by GitHub
parent d49f8fb30a
commit ddb10ac509
50 changed files with 311 additions and 288 deletions

View File

@@ -216,7 +216,7 @@ class AppendAttentionBackend(AttentionBackend):
forward_meta.seq_lens_encoder,
forward_meta.seq_lens_decoder,
forward_meta.seq_lens_this_time,
forward_meta.padding_offset,
forward_meta.batch_id_per_token,
forward_meta.cu_seqlens_q,
metadata.block_tables,
metadata.encoder_batch_ids,

View File

@@ -32,7 +32,7 @@ def append_attention(
seq_lens_encoder: paddle.Tensor,
seq_lens_decoder: paddle.Tensor,
seq_lens_this_time: paddle.Tensor,
padding_offsets: paddle.Tensor,
batch_id_per_token: paddle.Tensor,
cu_seqlens_q: paddle.Tensor,
block_tables: paddle.Tensor,
encoder_batch_ids: paddle.Tensor,
@@ -86,7 +86,7 @@ def append_attention(
seq_lens_encoder,
seq_lens_decoder,
seq_lens_this_time,
padding_offsets,
batch_id_per_token,
cu_seqlens_q,
block_tables,
encoder_batch_ids,