mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Speculative Decoding][MTP]Support mtp in epdptp mode (#4614)
* support mtp many features * support mtp reshard in rl mode * fix function * support mtp ep * support mtp in hybird-dp-tp mode * default open scheduler_v1 in mtp
This commit is contained in:
@@ -442,8 +442,7 @@ class EngineArgs:
|
||||
raise NotImplementedError("Only CUDA platform supports logprob.")
|
||||
if self.speculative_config is not None and self.logprobs_mode.startswith("processed"):
|
||||
raise NotImplementedError("processed_logprobs not support in speculative.")
|
||||
if self.speculative_config is not None:
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
|
||||
if self.splitwise_role != "mixed" and self.cache_transfer_protocol != "rdma":
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if not current_platform.is_cuda() and not current_platform.is_xpu():
|
||||
|
||||
Reference in New Issue
Block a user