[MTP]support mtp chunk_prefill_v1 (#4366)

* support mtp chunk_prefill_v1 * fix mtp chunkprefill output, fix unit test * fix unit test * fix save_output
2025-12-24 13:28:13 +08:00 · 2025-10-15 13:21:32 +08:00
parent ffe7af8a97
commit 582aebd48b
11 changed files with 118 additions and 58 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -1026,11 +1026,7 @@ class EngineArgs:

        speculative_cfg = self.create_speculative_config()
        if not self.enable_chunked_prefill:
-            if (
-                current_platform.is_cuda()
-                and self.splitwise_role == "mixed"
-                and (speculative_cfg is None or speculative_cfg.method not in ["mtp"])
-            ):
+            if current_platform.is_cuda() and self.splitwise_role == "mixed":
                # default enable chunked prefill
                self.enable_chunked_prefill = True