From d6bf6de5e651faa553913f14fbc72a37b250271f Mon Sep 17 00:00:00 2001 From: ming1753 <61511741+ming1753@users.noreply.github.com> Date: Mon, 8 Sep 2025 00:32:22 +0800 Subject: [PATCH] [Bug Fix] Fix mm performance degradation (#3942) * [Bug Fix] Fix mm performance degradation * formate --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: chenjian <1435317881@qq.com> --- fastdeploy/engine/common_engine.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 0bf1cdf39..dc13c74f0 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -527,8 +527,14 @@ class EngineSevice: self.cfg.max_prefill_batch, ) + if self.cfg.model_config.enable_mm: + self.resource_manager.check_and_free_block_tables() + available_blocks = self.resource_manager.available_block_num() + else: + available_blocks = self.cfg.cache_config.max_block_num_per_seq + tasks = self.scheduler.get_requests( - available_blocks=self.cfg.cache_config.max_block_num_per_seq, + available_blocks=available_blocks, block_size=self.cfg.cache_config.block_size, reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num, max_num_batched_tokens=self.cfg.max_model_len,