From 69b4d058adfb968e8e5608adcbab061e3c35a42b Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 27 Nov 2025 15:15:49 +0800 Subject: [PATCH] cp_fix_bug (#5253) --- fastdeploy/config.py | 2 +- fastdeploy/engine/common_engine.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 8d9aa9ab0..57fae20c1 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1572,7 +1572,7 @@ class FDConfig: self.max_prefill_batch = int(os.getenv("MAX_PREFILL_NUM", "3")) if current_platform.is_xpu(): self.max_prefill_batch = 1 - if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER: + if self.model_config is not None and self.model_config.enable_mm: self.max_prefill_batch = 1 # TODO:当前多模prefill阶段只支持并行度为1,待优化 else: self.max_prefill_batch = self.scheduler_config.max_num_seqs diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index aefe43037..b7a96489f 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -691,8 +691,16 @@ class EngineService: else: max_num_batched_tokens = self.cfg.model_config.max_model_len + # In multi-mode scenarios, using available_block_num to pull requests to prevent heavy rescheduling + # in the frequency domain due to insufficient blocks + if self.cfg.model_config.enable_mm: + self.resource_manager.check_and_free_block_tables() + available_blocks = self.resource_manager.available_block_num() + else: + available_blocks = self.cfg.cache_config.max_block_num_per_seq + tasks = self.scheduler.get_requests( - available_blocks=self.cfg.cache_config.max_block_num_per_seq, + available_blocks=available_blocks, block_size=self.cfg.cache_config.block_size, reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num, max_num_batched_tokens=max_num_batched_tokens,