diff --git a/fastdeploy/model_executor/pre_and_post_process.py b/fastdeploy/model_executor/pre_and_post_process.py
index 809f7d4f8..eba068e89 100644
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -72,7 +72,6 @@ DISABLE_RECOVER = envs.FD_DISABLED_RECOVER == "1"
 
 
 def pre_process(
-    max_len: int,
     input_ids: paddle.Tensor,
     seq_lens_this_time: int,
     speculative_decoding: bool,
@@ -83,7 +82,6 @@ def pre_process(
     """
     Preprocessing before embedding.
     Args:
-        max_len:
         input_ids:
         seq_lens_this_time:
         speculative_decoding:
@@ -97,6 +95,7 @@ def pre_process(
         cu_seqlens_k:
     """
     # Remove padding
+    max_len = input_ids.shape[1]
     cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
     token_num = paddle.sum(seq_lens_this_time)
     output_padding_offset = None
@@ -490,6 +489,7 @@ def rebuild_padding(
         )
     elif current_platform.is_dcu():
         from fastdeploy.model_executor.ops.gpu import rebuild_padding
+
         hidden_states = rebuild_padding(
             tmp_out,
             cum_offsets,
diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py
index 3acf7714d..a4748b2e5 100644
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -502,7 +502,6 @@ class MTPProposer(Proposer):
                     output_cum_offsets,
                     output_padding_offset,
                 ) = pre_process(
-                    self.parallel_config.max_model_len,
                     self.model_inputs["input_ids"],
                     self.model_inputs["seq_lens_this_time"],
                     True,
diff --git a/fastdeploy/worker/gcu_model_runner.py b/fastdeploy/worker/gcu_model_runner.py
index 406ce53d9..751f45432 100644
--- a/fastdeploy/worker/gcu_model_runner.py
+++ b/fastdeploy/worker/gcu_model_runner.py
@@ -449,7 +449,6 @@ class GCUModelRunner(ModelRunnerBase):
             output_cum_offsets,
             output_padding_offset,
         ) = pre_process(
-            self.parallel_config.max_model_len,
             self.share_inputs["input_ids"],
             self.share_inputs["seq_lens_this_time"],
             self.speculative_decoding,
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 15dc7de77..c639c29ef 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -601,7 +601,6 @@ class GPUModelRunner(ModelRunnerBase):
             output_cum_offsets,
             output_padding_offset,
         ) = pre_process(
-            self.parallel_config.max_model_len,
             self.share_inputs["input_ids"],
             self.share_inputs["seq_lens_this_time"],
             self.speculative_decoding,
diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py
index 601d7f264..731990ff5 100644
--- a/fastdeploy/worker/xpu_model_runner.py
+++ b/fastdeploy/worker/xpu_model_runner.py
@@ -41,7 +41,6 @@ logger = get_logger("xpu_model_runner", "xpu_model_runner.log")
 
 
 def xpu_pre_process(
-    max_len: int,
     input_ids: paddle.Tensor,
     seq_lens_this_time: int,
     share_inputs: Dict,
@@ -51,6 +50,7 @@ def xpu_pre_process(
     seq_lens_decoder: Optional[paddle.Tensor] = None,
 ) -> XPUForwardMeta:
     """ """
+    max_len = input_ids.shape[1]
     cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
     token_num = paddle.sum(seq_lens_this_time)
     from fastdeploy.model_executor.ops.xpu import (
@@ -458,7 +458,6 @@ class XPUModelRunner(ModelRunnerBase):
     def _prepare_inputs(self) -> None:
         """prepare the model inputs"""
         self.forward_meta = xpu_pre_process(
-            self.parallel_config.max_model_len,
             self.share_inputs["input_ids"],
             self.share_inputs["seq_lens_this_time"],
             self.share_inputs,