From 8567ada09e23aedd67decccd23ba71e030a64a66 Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Thu, 4 Sep 2025 20:54:55 +0800 Subject: [PATCH] [Fix] disable scheduler v1 in guided decoding (#3877) * disable scheduler v1 in guided decoding * disable scheduler v1 in guided decoding --- fastdeploy/engine/args_utils.py | 2 ++ fastdeploy/worker/worker_process.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index c6811e351..306423aaf 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -398,6 +398,8 @@ class EngineArgs: envs.ENABLE_V1_KVCACHE_SCHEDULER = 0 if not current_platform.is_cuda(): envs.ENABLE_V1_KVCACHE_SCHEDULER = 0 + if self.guided_decoding_backend != "off": + envs.ENABLE_V1_KVCACHE_SCHEDULER = 0 @staticmethod def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 8ba805023..8a0ff6f09 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -755,6 +755,9 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig: if not current_platform.is_cuda(): logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported.") envs.ENABLE_V1_KVCACHE_SCHEDULER = 0 + if parallel_config.guided_decoding_backend != "off": + logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported guided_decoding.") + envs.ENABLE_V1_KVCACHE_SCHEDULER = 0 fd_config = FDConfig( model_config=model_config,