From 74ba637b6beb3d3ae3ac2efd23fa9739adc770a1 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 3 Dec 2025 20:59:32 +0800 Subject: [PATCH] remove close prefix cache (#5363) --- fastdeploy/engine/args_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index c0b4e3a01..a53658963 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -429,8 +429,6 @@ class EngineArgs: self.tokenizer = self.model if self.splitwise_role == "decode": self.enable_prefix_caching = False - if self.speculative_config is not None: - self.enable_prefix_caching = False if not current_platform.is_cuda(): self.enable_prefix_caching = False if self.dynamic_load_weight: