mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[FDConfig] Turn on the CUDAGraph + Speculative Decoding switch (#4511)
This commit is contained in:
@@ -1511,14 +1511,13 @@ class FDConfig:
|
||||
|
||||
# Adjustment GraphOptConfig
|
||||
if (
|
||||
(self.speculative_config is not None and self.speculative_config.method is not None)
|
||||
(self.scheduler_config.splitwise_role != "mixed")
|
||||
or (self.model_config is not None and self.model_config.enable_mm is True)
|
||||
or (self.load_config is not None and self.load_config.dynamic_load_weight is True)
|
||||
or (self.scheduler_config.splitwise_role != "mixed")
|
||||
):
|
||||
self.graph_opt_config.use_cudagraph = False
|
||||
logger.info(
|
||||
"CUDAGraph does not support to be started together with SpeculativeDecode and MultiModel temporarily, but has been automatically closed!"
|
||||
"CUDAGraph does not support to be started together with MultiModel temporarily, but has been automatically closed!"
|
||||
)
|
||||
if self.load_config is not None and self.load_config.dynamic_load_weight is True:
|
||||
self.graph_opt_config.graph_opt_level = 0
|
||||
@@ -1634,10 +1633,6 @@ class FDConfig:
|
||||
|
||||
# Check graph optimization config
|
||||
if self.graph_opt_config.use_cudagraph:
|
||||
if self.speculative_config is not None:
|
||||
assert (
|
||||
self.speculative_config.method is None
|
||||
), "CUDAGraph does not support the simultaneous use of Speculative Decoding"
|
||||
if self.model_config is not None:
|
||||
assert (
|
||||
self.model_config.enable_mm is not True
|
||||
|
||||
Reference in New Issue
Block a user