delete max-len (#2959)

This commit is contained in:
lizexu123
2025-07-23 15:11:39 +08:00
committed by GitHub
parent 5b59a97030
commit 9b22b8d2c3
5 changed files with 3 additions and 7 deletions

View File

@@ -72,7 +72,6 @@ DISABLE_RECOVER = envs.FD_DISABLED_RECOVER == "1"
def pre_process( def pre_process(
max_len: int,
input_ids: paddle.Tensor, input_ids: paddle.Tensor,
seq_lens_this_time: int, seq_lens_this_time: int,
speculative_decoding: bool, speculative_decoding: bool,
@@ -83,7 +82,6 @@ def pre_process(
""" """
Preprocessing before embedding. Preprocessing before embedding.
Args: Args:
max_len:
input_ids: input_ids:
seq_lens_this_time: seq_lens_this_time:
speculative_decoding: speculative_decoding:
@@ -97,6 +95,7 @@ def pre_process(
cu_seqlens_k: cu_seqlens_k:
""" """
# Remove padding # Remove padding
max_len = input_ids.shape[1]
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
output_padding_offset = None output_padding_offset = None
@@ -490,6 +489,7 @@ def rebuild_padding(
) )
elif current_platform.is_dcu(): elif current_platform.is_dcu():
from fastdeploy.model_executor.ops.gpu import rebuild_padding from fastdeploy.model_executor.ops.gpu import rebuild_padding
hidden_states = rebuild_padding( hidden_states = rebuild_padding(
tmp_out, tmp_out,
cum_offsets, cum_offsets,

View File

@@ -502,7 +502,6 @@ class MTPProposer(Proposer):
output_cum_offsets, output_cum_offsets,
output_padding_offset, output_padding_offset,
) = pre_process( ) = pre_process(
self.parallel_config.max_model_len,
self.model_inputs["input_ids"], self.model_inputs["input_ids"],
self.model_inputs["seq_lens_this_time"], self.model_inputs["seq_lens_this_time"],
True, True,

View File

@@ -449,7 +449,6 @@ class GCUModelRunner(ModelRunnerBase):
output_cum_offsets, output_cum_offsets,
output_padding_offset, output_padding_offset,
) = pre_process( ) = pre_process(
self.parallel_config.max_model_len,
self.share_inputs["input_ids"], self.share_inputs["input_ids"],
self.share_inputs["seq_lens_this_time"], self.share_inputs["seq_lens_this_time"],
self.speculative_decoding, self.speculative_decoding,

View File

@@ -601,7 +601,6 @@ class GPUModelRunner(ModelRunnerBase):
output_cum_offsets, output_cum_offsets,
output_padding_offset, output_padding_offset,
) = pre_process( ) = pre_process(
self.parallel_config.max_model_len,
self.share_inputs["input_ids"], self.share_inputs["input_ids"],
self.share_inputs["seq_lens_this_time"], self.share_inputs["seq_lens_this_time"],
self.speculative_decoding, self.speculative_decoding,

View File

@@ -41,7 +41,6 @@ logger = get_logger("xpu_model_runner", "xpu_model_runner.log")
def xpu_pre_process( def xpu_pre_process(
max_len: int,
input_ids: paddle.Tensor, input_ids: paddle.Tensor,
seq_lens_this_time: int, seq_lens_this_time: int,
share_inputs: Dict, share_inputs: Dict,
@@ -51,6 +50,7 @@ def xpu_pre_process(
seq_lens_decoder: Optional[paddle.Tensor] = None, seq_lens_decoder: Optional[paddle.Tensor] = None,
) -> XPUForwardMeta: ) -> XPUForwardMeta:
""" """ """ """
max_len = input_ids.shape[1]
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
from fastdeploy.model_executor.ops.xpu import ( from fastdeploy.model_executor.ops.xpu import (
@@ -458,7 +458,6 @@ class XPUModelRunner(ModelRunnerBase):
def _prepare_inputs(self) -> None: def _prepare_inputs(self) -> None:
"""prepare the model inputs""" """prepare the model inputs"""
self.forward_meta = xpu_pre_process( self.forward_meta = xpu_pre_process(
self.parallel_config.max_model_len,
self.share_inputs["input_ids"], self.share_inputs["input_ids"],
self.share_inputs["seq_lens_this_time"], self.share_inputs["seq_lens_this_time"],
self.share_inputs, self.share_inputs,