mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Speculative Decoding][MTP]Support stop_seqs and pd-split mode (#5029)
* support multi_stop_seqs in speculative decoding * support mtp tp with ep split * fix custom op register * fix spec stop_seqs params
This commit is contained in:
@@ -79,6 +79,7 @@ else:
|
||||
speculate_step_paddle,
|
||||
speculate_step_system_cache,
|
||||
speculate_update,
|
||||
speculate_set_stop_value_multi_seqs,
|
||||
step_paddle,
|
||||
step_system_cache,
|
||||
update_inputs,
|
||||
@@ -467,7 +468,17 @@ def post_process_specualate(
|
||||
think_end_id=think_end_id,
|
||||
line_break_id=line_break_id,
|
||||
)
|
||||
|
||||
speculate_set_stop_value_multi_seqs(
|
||||
model_output.accept_tokens,
|
||||
model_output.accept_num,
|
||||
model_output.pre_ids,
|
||||
model_output.step_idx,
|
||||
model_output.stop_flags,
|
||||
model_output.seq_lens_this_time,
|
||||
model_output.stop_token_ids,
|
||||
model_output.stop_seqs_len,
|
||||
model_output.eos_token_id,
|
||||
)
|
||||
speculate_update(
|
||||
model_output.seq_lens_encoder,
|
||||
model_output.seq_lens_decoder,
|
||||
|
||||
Reference in New Issue
Block a user