[Inference, rename] remove padding_offsets from atten use batch_id_per_token (#2880)

* remove padding_offsets from atten
2025-10-05 08:37:06 +08:00 · 2025-07-17 18:41:31 +08:00
parent d49f8fb30a
commit ddb10ac509
50 changed files with 311 additions and 288 deletions
--- a/fastdeploy/model_executor/forward_meta.py
+++ b/fastdeploy/model_executor/forward_meta.py
@@ -85,8 +85,8 @@ class ForwardMeta():

    # Accumulated offset
    cum_offsets: Optional[paddle.Tensor] = None
-    # Offset tensor, used to restore the position of ids_remove_madding after padding removal to the original input_ids
-    padding_offset: Optional[paddle.Tensor] = None
+    # batch_id_per_token tensor, used to indicate which token belongs which batch after padding removal to the original input_ids
+    batch_id_per_token: Optional[paddle.Tensor] = None
    # Accumulated sequence length of query
    cu_seqlens_q: Optional[paddle.Tensor] = None
    # Accumulated sequence length of key