[Speculative Decoding][MTP]Support mtp in epdptp mode (#4614)

* support mtp many features * support mtp reshard in rl mode * fix function * support mtp ep * support mtp in hybird-dp-tp mode * default open scheduler_v1 in mtp
2025-12-24 13:28:13 +08:00 · 2025-10-28 16:02:47 +08:00
parent b4014834a9
commit c63361fd1d
10 changed files with 124 additions and 74 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -778,13 +778,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
    logger.info(f"- Dynamic load weight: {load_config.dynamic_load_weight}")
    logger.info(f"- Load strategy: {load_config.load_strategy}")

-    if (
-        args.speculative_config is not None
-        and ("method" in args.speculative_config)
-        and (args.speculative_config["method"] is not None)
-    ):
-        logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not support speculative decoding now.")
-        envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
    if args.splitwise_role != "mixed" and args.cache_transfer_protocol != "rdma":
        envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
    if not current_platform.is_cuda() and not current_platform.is_xpu():