[Feature] enable guided decoding ENABLE_V1_KVCACHE_SCHEDULER = 1 (#5140)

* enable guided decoding ENABLE_V1_KVCACHE_SCHEDULER = 1 * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-11-26 10:22:35 +08:00
parent 2d787590c4
commit f25ee3a26f
3 changed files with 38 additions and 5 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -968,9 +968,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
    if not (current_platform.is_cuda() or current_platform.is_xpu() or current_platform.is_maca()):
        logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported.")
        envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
-    if structured_outputs_config.guided_decoding_backend != "off":
-        logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported guided_decoding.")
-        envs.ENABLE_V1_KVCACHE_SCHEDULER = 0

    if envs.ENABLE_V1_KVCACHE_SCHEDULER and args.splitwise_role == "prefill":
        os.environ["PREFILL_NODE_ONE_STEP_STOP_V1"] = "1"