diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 738f563ef..1d70da38b 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1511,14 +1511,13 @@ class FDConfig: # Adjustment GraphOptConfig if ( - (self.speculative_config is not None and self.speculative_config.method is not None) + (self.scheduler_config.splitwise_role != "mixed") or (self.model_config is not None and self.model_config.enable_mm is True) or (self.load_config is not None and self.load_config.dynamic_load_weight is True) - or (self.scheduler_config.splitwise_role != "mixed") ): self.graph_opt_config.use_cudagraph = False logger.info( - "CUDAGraph does not support to be started together with SpeculativeDecode and MultiModel temporarily, but has been automatically closed!" + "CUDAGraph does not support to be started together with MultiModel temporarily, but has been automatically closed!" ) if self.load_config is not None and self.load_config.dynamic_load_weight is True: self.graph_opt_config.graph_opt_level = 0 @@ -1634,10 +1633,6 @@ class FDConfig: # Check graph optimization config if self.graph_opt_config.use_cudagraph: - if self.speculative_config is not None: - assert ( - self.speculative_config.method is None - ), "CUDAGraph does not support the simultaneous use of Speculative Decoding" if self.model_config is not None: assert ( self.model_config.enable_mm is not True