[Speculative Decoding][MTP]Support stop_seqs and pd-split mode (#5029)

* support multi_stop_seqs in speculative decoding

* support mtp tp with ep split

* fix custom op register

* fix spec stop_seqs params
This commit is contained in:
freeliuzc
2025-11-20 15:26:01 +08:00
committed by GitHub
parent 3e3558f492
commit f1e36ff2f7
3 changed files with 143 additions and 128 deletions

View File

@@ -79,6 +79,7 @@ else:
speculate_step_paddle,
speculate_step_system_cache,
speculate_update,
speculate_set_stop_value_multi_seqs,
step_paddle,
step_system_cache,
update_inputs,
@@ -467,7 +468,17 @@ def post_process_specualate(
think_end_id=think_end_id,
line_break_id=line_break_id,
)
speculate_set_stop_value_multi_seqs(
model_output.accept_tokens,
model_output.accept_num,
model_output.pre_ids,
model_output.step_idx,
model_output.stop_flags,
model_output.seq_lens_this_time,
model_output.stop_token_ids,
model_output.stop_seqs_len,
model_output.eos_token_id,
)
speculate_update(
model_output.seq_lens_encoder,
model_output.seq_lens_decoder,