diff --git a/fastdeploy/config.py b/fastdeploy/config.py index aec08f00c..fd62598e0 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1730,6 +1730,8 @@ class FDConfig: f"set to max_num_batched_tokens." ) self.cache_config.max_encoder_cache = self.scheduler_config.max_num_batched_tokens + # TODO: mm encoder_cache close for now + self.cache_config.max_encoder_cache = 0 else: self.cache_config.max_encoder_cache = 0