[MTP]support mtp chunk_prefill_v1 (#4366)

* support mtp chunk_prefill_v1 * fix mtp chunkprefill output, fix unit test * fix unit test * fix save_output
2025-12-24 13:28:13 +08:00 · 2025-10-15 13:21:32 +08:00
parent ffe7af8a97
commit 582aebd48b
11 changed files with 118 additions and 58 deletions
--- a/custom_ops/gpu_ops/cpp_extensions.cc
+++ b/custom_ops/gpu_ops/cpp_extensions.cc
@@ -709,8 +709,11 @@ void SpeculateSetValueByFlagsAndIdx(const paddle::Tensor &pre_ids_all,
 void SpeculateSaveWithOutputMsgStatic(const paddle::Tensor& accept_tokens,
                                      const paddle::Tensor& accept_num,
                                      const paddle::Tensor& not_need_stop,
+                                      const paddle::Tensor& seq_lens_decoder,
+                                      const paddle::Tensor& prompt_lens,
                                      int64_t rank_id,
-                                      bool save_each_rank);
+                                      bool save_each_rank,
+                                      bool skip_prefill);


 void SpeculateClearAcceptNums(const paddle::Tensor& accept_num,
@@ -719,7 +722,9 @@ void SpeculateClearAcceptNums(const paddle::Tensor& accept_num,
 void SpeculateScheduleCache(const paddle::Tensor &draft_tokens,
                            const paddle::Tensor &block_tables,
                            const paddle::Tensor &stop_flags,
+                            const paddle::Tensor &prompt_lens,
                            const paddle::Tensor &seq_lens_this_time,
+                            const paddle::Tensor &seq_lens_encoder,
                            const paddle::Tensor &seq_lens_decoder,
                            const paddle::Tensor &step_seq_lens_decoder,
                            const paddle::Tensor &step_draft_tokens,