[MTP]support mtp chunk_prefill_v1 (#4366)

* support mtp chunk_prefill_v1

* fix mtp chunkprefill output, fix unit test

* fix unit test

* fix save_output
This commit is contained in:
freeliuzc
2025-10-15 13:21:32 +08:00
committed by GitHub
parent ffe7af8a97
commit 582aebd48b
11 changed files with 118 additions and 58 deletions

View File

@@ -1026,11 +1026,7 @@ class EngineArgs:
speculative_cfg = self.create_speculative_config()
if not self.enable_chunked_prefill:
if (
current_platform.is_cuda()
and self.splitwise_role == "mixed"
and (speculative_cfg is None or speculative_cfg.method not in ["mtp"])
):
if current_platform.is_cuda() and self.splitwise_role == "mixed":
# default enable chunked prefill
self.enable_chunked_prefill = True