[MTP]support mtp chunk_prefill_v1 (#4366)

* support mtp chunk_prefill_v1

* fix mtp chunkprefill output, fix unit test

* fix unit test

* fix save_output
This commit is contained in:
freeliuzc
2025-10-15 13:21:32 +08:00
committed by GitHub
parent ffe7af8a97
commit 582aebd48b
11 changed files with 118 additions and 58 deletions

View File

@@ -709,8 +709,11 @@ void SpeculateSetValueByFlagsAndIdx(const paddle::Tensor &pre_ids_all,
void SpeculateSaveWithOutputMsgStatic(const paddle::Tensor& accept_tokens,
const paddle::Tensor& accept_num,
const paddle::Tensor& not_need_stop,
const paddle::Tensor& seq_lens_decoder,
const paddle::Tensor& prompt_lens,
int64_t rank_id,
bool save_each_rank);
bool save_each_rank,
bool skip_prefill);
void SpeculateClearAcceptNums(const paddle::Tensor& accept_num,
@@ -719,7 +722,9 @@ void SpeculateClearAcceptNums(const paddle::Tensor& accept_num,
void SpeculateScheduleCache(const paddle::Tensor &draft_tokens,
const paddle::Tensor &block_tables,
const paddle::Tensor &stop_flags,
const paddle::Tensor &prompt_lens,
const paddle::Tensor &seq_lens_this_time,
const paddle::Tensor &seq_lens_encoder,
const paddle::Tensor &seq_lens_decoder,
const paddle::Tensor &step_seq_lens_decoder,
const paddle::Tensor &step_draft_tokens,