Revert "[BugFix] num_seqs (#3291)" (#3316)

This reverts commit e0aeac58e1.
This commit is contained in:
Jiang-Jia-Jun
2025-08-11 16:16:51 +08:00
committed by GitHub
parent 9571c458f0
commit c56c99837a
3 changed files with 3 additions and 6 deletions

View File

@@ -799,7 +799,7 @@ class GPUModelRunner(ModelRunnerBase):
output_padding_offset,
) = pre_process(
self.share_inputs["input_ids"],
getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
self.share_inputs["seq_lens_this_time"],
self.speculative_decoding,
(self.share_inputs["draft_tokens"] if self.speculative_decoding else None),
self.share_inputs["seq_lens_encoder"],
@@ -884,7 +884,7 @@ class GPUModelRunner(ModelRunnerBase):
max_len_tensor_cpu=self.share_inputs["max_len_tensor_cpu"],
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
seq_lens_this_time=getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
batch_id_per_token=self.share_inputs["batch_id_per_token"],
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],