[Attention] remove cum_offsets from atten, and use cu_seqlens_q (#2870)

2025-10-13 04:13:58 +08:00 · 2025-07-16 20:10:57 +08:00
parent 42b80182e0
commit aa76085d1f
47 changed files with 237 additions and 260 deletions
--- a/fastdeploy/model_executor/layers/attention/ops/append_attention.py
+++ b/fastdeploy/model_executor/layers/attention/ops/append_attention.py
@@ -33,7 +33,7 @@ def append_attention(
    seq_lens_decoder: paddle.Tensor,
    seq_lens_this_time: paddle.Tensor,
    padding_offsets: paddle.Tensor,
-    cum_offsets: paddle.Tensor,
+    cu_seqlens_q: paddle.Tensor,
    block_tables: paddle.Tensor,
    encoder_batch_ids: paddle.Tensor,
    encoder_tile_ids_per_batch: paddle.Tensor,
@@ -87,7 +87,7 @@ def append_attention(
            seq_lens_decoder,
            seq_lens_this_time,
            padding_offsets,
-            cum_offsets,
+            cu_seqlens_q,
            block_tables,
            encoder_batch_ids,
            encoder_tile_ids_per_batch,