mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[MTP]support mtp chunk_prefill_v1 (#4366)
* support mtp chunk_prefill_v1 * fix mtp chunkprefill output, fix unit test * fix unit test * fix save_output
This commit is contained in:
@@ -1026,11 +1026,7 @@ class EngineArgs:
|
||||
|
||||
speculative_cfg = self.create_speculative_config()
|
||||
if not self.enable_chunked_prefill:
|
||||
if (
|
||||
current_platform.is_cuda()
|
||||
and self.splitwise_role == "mixed"
|
||||
and (speculative_cfg is None or speculative_cfg.method not in ["mtp"])
|
||||
):
|
||||
if current_platform.is_cuda() and self.splitwise_role == "mixed":
|
||||
# default enable chunked prefill
|
||||
self.enable_chunked_prefill = True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user