[MTP] optimize mtp infer speed (#2840)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
freeliuzc
2025-07-14 19:50:22 +08:00
committed by GitHub
parent 4c7b8bc458
commit 7cdd8d290d
6 changed files with 253 additions and 24 deletions

View File

@@ -101,6 +101,8 @@ def pre_process(
seq_lens_encoder,
seq_lens_decoder,
)
if isinstance(seq_lens_output, list):
seq_lens_output = seq_lens_output[0]
output_token_num = paddle.sum(seq_lens_output)
output_cum_offsets_tmp = paddle.cumsum(max_len - seq_lens_output)
output_padding_offset, output_cum_offsets = speculate_get_output_padding_offset(