diff --git a/fastdeploy/model_executor/forward_meta.py b/fastdeploy/model_executor/forward_meta.py
index d6108cffc..2a1d3c56f 100644
--- a/fastdeploy/model_executor/forward_meta.py
+++ b/fastdeploy/model_executor/forward_meta.py
@@ -85,8 +85,6 @@ class ForwardMeta:
     # The sequence length processed in the current step
     seq_lens_this_time: Optional[paddle.Tensor] = None
 
-    # Accumulated offset
-    cum_offsets: Optional[paddle.Tensor] = None
     # batch_id_per_token tensor, used to indicate which token belongs which batch after padding removal to the original input_ids
     batch_id_per_token: Optional[paddle.Tensor] = None
     # Accumulated sequence length of query
@@ -112,7 +110,8 @@ class XPUForwardMeta(ForwardMeta):
     """
     XPUForwardMeta is used to store the global meta information of the forward, and some XPU specific meta info.
     """
-
+    # Accumulated offset
+    cum_offsets: Optional[paddle.Tensor] = None
     # TODO(wanghaitao): Supplementary notes
     #
     encoder_batch_map: Optional[paddle.Tensor] = None
diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py
index c2a5d0c4b..c3c559832 100644
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -397,7 +397,6 @@ class MTPProposer(Proposer):
             seq_lens_encoder=self.model_inputs["seq_lens_encoder"],
             seq_lens_decoder=self.model_inputs["seq_lens_decoder"],
             seq_lens_this_time=self.model_inputs["seq_lens_this_time"],
-            cum_offsets=self.model_inputs["cum_offsets"],
             batch_id_per_token=self.model_inputs["batch_id_per_token"],
             cu_seqlens_q=self.model_inputs["cu_seqlens_q"],
             cu_seqlens_k=self.model_inputs["cu_seqlens_k"],
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 6ce285081..b3c046e1d 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -680,7 +680,6 @@ class GPUModelRunner(ModelRunnerBase):
             seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
             seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
             seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
-            cum_offsets=self.share_inputs["cum_offsets"],
             batch_id_per_token=self.share_inputs["batch_id_per_token"],
             cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
             cu_seqlens_k=self.share_inputs["cu_seqlens_k"],