[FDConfig]Turn on the CUDAGraph + RL switch (#4508)

* Turn on the CUDAGraph + RL switch

* reduce max_num_seqs and number of request
This commit is contained in:
RAM
2025-10-23 11:08:07 +08:00
committed by GitHub
parent 918e4e9850
commit 8a02ab43a8
3 changed files with 5 additions and 6 deletions

View File

@@ -1510,9 +1510,7 @@ class FDConfig:
self.structured_outputs_config.guided_decoding_backend = "xgrammar"
# Adjustment GraphOptConfig
if (self.scheduler_config.splitwise_role != "mixed") or (
self.load_config is not None and self.load_config.dynamic_load_weight is True
):
if self.scheduler_config.splitwise_role != "mixed":
self.graph_opt_config.use_cudagraph = False
logger.info(
"CUDAGraph does not support to be started together with PD Disaggregation temporarily, but has been automatically closed!"
@@ -1630,11 +1628,12 @@ class FDConfig:
self.scheduler_config.check()
# Check graph optimization config
if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph:
if self.graph_opt_config.graph_opt_level > 0:
if self.load_config is not None:
assert (
self.load_config.dynamic_load_weight is False
), "Static graph cannot be used in RL scene temporarily"
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1:
assert (
int(envs.FD_DISABLED_RECOVER) == 0