diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index c0b4e3a01..a53658963 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -429,8 +429,6 @@ class EngineArgs: self.tokenizer = self.model if self.splitwise_role == "decode": self.enable_prefix_caching = False - if self.speculative_config is not None: - self.enable_prefix_caching = False if not current_platform.is_cuda(): self.enable_prefix_caching = False if self.dynamic_load_weight: