[FDConfig]Turn on the CUDAGraph + RL switch (#4508)

* Turn on the CUDAGraph + RL switch * reduce max_num_seqs and number of request
2025-12-24 13:28:13 +08:00 · 2025-10-23 11:08:07 +08:00
parent 918e4e9850
commit 8a02ab43a8
3 changed files with 5 additions and 6 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1510,9 +1510,7 @@ class FDConfig:
                self.structured_outputs_config.guided_decoding_backend = "xgrammar"

        # Adjustment GraphOptConfig
-        if (self.scheduler_config.splitwise_role != "mixed") or (
-            self.load_config is not None and self.load_config.dynamic_load_weight is True
-        ):
+        if self.scheduler_config.splitwise_role != "mixed":
            self.graph_opt_config.use_cudagraph = False
            logger.info(
                "CUDAGraph does not support to be started together with PD Disaggregation temporarily, but has been automatically closed!"
@@ -1630,11 +1628,12 @@ class FDConfig:
            self.scheduler_config.check()

        # Check graph optimization config
-        if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph:
+        if self.graph_opt_config.graph_opt_level > 0:
            if self.load_config is not None:
                assert (
                    self.load_config.dynamic_load_weight is False
                ), "Static graph cannot be used in RL scene temporarily"
+
        if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1:
            assert (
                int(envs.FD_DISABLED_RECOVER) == 0
--- a/tests/ce/stable_cases/launch_model.sh
+++ b/tests/ce/stable_cases/launch_model.sh
@@ -38,7 +38,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
       --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
       --quantization wint8 \
       --max-model-len 32768 \
-       --max-num-seqs 256 \
+       --max-num-seqs 1 \
       --gpu-memory-utilization 0.9 \
       --model "$MODEL_PATH" \
       --load-strategy ipc_snapshot \
--- a/tests/ce/stable_cases/run.sh
+++ b/tests/ce/stable_cases/run.sh
@@ -12,7 +12,7 @@ PORT="${FD_API_PORT}"  # 这里需要配合启动脚本那个URL PORT
 BASE_URL="http://$HOST:$PORT"

 TOTAL_ROUNDS=30
-CHAT_REQUESTS_PER_ROUND=5
+CHAT_REQUESTS_PER_ROUND=1
 export CUDA_VISIBLE_DEVICES=0,1
 MAX_MEMORY_MB=10240  # 10GB