[Feature][MTP]Support new mtp (#3656)

* update multi-draft-token strategy

* fix format

* support hybrid mtp with ngram speculative decoding method
This commit is contained in:
freeliuzc
2025-08-27 19:38:26 +08:00
committed by GitHub
parent 62659a7a73
commit c753f1fc9e
20 changed files with 501 additions and 579 deletions

View File

@@ -59,7 +59,7 @@ else:
speculate_set_value_by_flags_and_idx,
speculate_step_paddle,
speculate_step_system_cache,
speculate_update_v3,
speculate_update,
step_paddle,
step_system_cache,
update_inputs,
@@ -288,7 +288,7 @@ def post_process_normal(
def post_process_specualate(model_output, save_each_rank: bool = False, skip_save_output: bool = False):
""""""
speculate_update_v3(
speculate_update(
model_output.seq_lens_encoder,
model_output.seq_lens_decoder,
model_output.not_need_stop,