diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 0bf1cdf39..dc13c74f0 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -527,8 +527,14 @@ class EngineSevice: self.cfg.max_prefill_batch, ) + if self.cfg.model_config.enable_mm: + self.resource_manager.check_and_free_block_tables() + available_blocks = self.resource_manager.available_block_num() + else: + available_blocks = self.cfg.cache_config.max_block_num_per_seq + tasks = self.scheduler.get_requests( - available_blocks=self.cfg.cache_config.max_block_num_per_seq, + available_blocks=available_blocks, block_size=self.cfg.cache_config.block_size, reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num, max_num_batched_tokens=self.cfg.max_model_len,