[FDConfig] Turn on the CUDAGraph + Speculative Decoding switch (#4511)

This commit is contained in:
RAM
2025-10-21 18:34:16 +08:00
committed by GitHub
parent 153f15db39
commit 7cbe6b2472

View File

@@ -1511,14 +1511,13 @@ class FDConfig:
# Adjustment GraphOptConfig
if (
(self.speculative_config is not None and self.speculative_config.method is not None)
(self.scheduler_config.splitwise_role != "mixed")
or (self.model_config is not None and self.model_config.enable_mm is True)
or (self.load_config is not None and self.load_config.dynamic_load_weight is True)
or (self.scheduler_config.splitwise_role != "mixed")
):
self.graph_opt_config.use_cudagraph = False
logger.info(
"CUDAGraph does not support to be started together with SpeculativeDecode and MultiModel temporarily, but has been automatically closed!"
"CUDAGraph does not support to be started together with MultiModel temporarily, but has been automatically closed!"
)
if self.load_config is not None and self.load_config.dynamic_load_weight is True:
self.graph_opt_config.graph_opt_level = 0
@@ -1634,10 +1633,6 @@ class FDConfig:
# Check graph optimization config
if self.graph_opt_config.use_cudagraph:
if self.speculative_config is not None:
assert (
self.speculative_config.method is None
), "CUDAGraph does not support the simultaneous use of Speculative Decoding"
if self.model_config is not None:
assert (
self.model_config.enable_mm is not True