From 8a02ab43a8cb8df242b3eef965b145b9e7e42fae Mon Sep 17 00:00:00 2001 From: RAM Date: Thu, 23 Oct 2025 11:08:07 +0800 Subject: [PATCH] [FDConfig]Turn on the CUDAGraph + RL switch (#4508) * Turn on the CUDAGraph + RL switch * reduce max_num_seqs and number of request --- fastdeploy/config.py | 7 +++---- tests/ce/stable_cases/launch_model.sh | 2 +- tests/ce/stable_cases/run.sh | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index e55db280b..03e9c2e68 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1510,9 +1510,7 @@ class FDConfig: self.structured_outputs_config.guided_decoding_backend = "xgrammar" # Adjustment GraphOptConfig - if (self.scheduler_config.splitwise_role != "mixed") or ( - self.load_config is not None and self.load_config.dynamic_load_weight is True - ): + if self.scheduler_config.splitwise_role != "mixed": self.graph_opt_config.use_cudagraph = False logger.info( "CUDAGraph does not support to be started together with PD Disaggregation temporarily, but has been automatically closed!" @@ -1630,11 +1628,12 @@ class FDConfig: self.scheduler_config.check() # Check graph optimization config - if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph: + if self.graph_opt_config.graph_opt_level > 0: if self.load_config is not None: assert ( self.load_config.dynamic_load_weight is False ), "Static graph cannot be used in RL scene temporarily" + if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1: assert ( int(envs.FD_DISABLED_RECOVER) == 0 diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh index 1850dc944..3b758a15a 100644 --- a/tests/ce/stable_cases/launch_model.sh +++ b/tests/ce/stable_cases/launch_model.sh @@ -38,7 +38,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --cache-queue-port ${FD_CACHE_QUEUE_PORT} \ --quantization wint8 \ --max-model-len 32768 \ - --max-num-seqs 256 \ + --max-num-seqs 1 \ --gpu-memory-utilization 0.9 \ --model "$MODEL_PATH" \ --load-strategy ipc_snapshot \ diff --git a/tests/ce/stable_cases/run.sh b/tests/ce/stable_cases/run.sh index 6b7f939bb..81197253b 100644 --- a/tests/ce/stable_cases/run.sh +++ b/tests/ce/stable_cases/run.sh @@ -12,7 +12,7 @@ PORT="${FD_API_PORT}" # 这里需要配合启动脚本那个URL PORT BASE_URL="http://$HOST:$PORT" TOTAL_ROUNDS=30 -CHAT_REQUESTS_PER_ROUND=5 +CHAT_REQUESTS_PER_ROUND=1 export CUDA_VISIBLE_DEVICES=0,1 MAX_MEMORY_MB=10240 # 10GB