remove cum_offsets from ForwardMeta (#2925)

2025-10-04 16:22:57 +08:00 · 2025-07-19 23:57:27 +08:00
parent 25698d56d1
commit 8c5407d9e4
3 changed files with 2 additions and 5 deletions
--- a/fastdeploy/model_executor/forward_meta.py
+++ b/fastdeploy/model_executor/forward_meta.py
@@ -85,8 +85,6 @@ class ForwardMeta:
    # The sequence length processed in the current step
    seq_lens_this_time: Optional[paddle.Tensor] = None
    # Accumulated offset
    cum_offsets: Optional[paddle.Tensor] = None
    # batch_id_per_token tensor, used to indicate which token belongs which batch after padding removal to the original input_ids
    batch_id_per_token: Optional[paddle.Tensor] = None
    # Accumulated sequence length of query
@@ -112,7 +110,8 @@ class XPUForwardMeta(ForwardMeta):
    """
    XPUForwardMeta is used to store the global meta information of the forward, and some XPU specific meta info.
    """
-
+    # Accumulated offset
    cum_offsets: Optional[paddle.Tensor] = None
    # TODO(wanghaitao): Supplementary notes
    #
    encoder_batch_map: Optional[paddle.Tensor] = None
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -397,7 +397,6 @@ class MTPProposer(Proposer):
            seq_lens_encoder=self.model_inputs["seq_lens_encoder"],
            seq_lens_decoder=self.model_inputs["seq_lens_decoder"],
            seq_lens_this_time=self.model_inputs["seq_lens_this_time"],
            cum_offsets=self.model_inputs["cum_offsets"],
            batch_id_per_token=self.model_inputs["batch_id_per_token"],
            cu_seqlens_q=self.model_inputs["cu_seqlens_q"],
            cu_seqlens_k=self.model_inputs["cu_seqlens_k"],
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -680,7 +680,6 @@ class GPUModelRunner(ModelRunnerBase):
            seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
            seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
            seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
            cum_offsets=self.share_inputs["cum_offsets"],
            batch_id_per_token=self.share_inputs["batch_id_per_token"],
            cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
            cu_seqlens_k=self.share_inputs["cu_seqlens_k"],