From 35846909c76aabe8ff9370f23ebff26fa47e4c00 Mon Sep 17 00:00:00 2001 From: Yonghua Li <39643373+liyonghua0910@users.noreply.github.com> Date: Fri, 5 Dec 2025 18:23:42 +0800 Subject: [PATCH] [fix] fix scheduler hang when input length is very close to max_model_len (#5393) --- fastdeploy/engine/common_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 51419bde9..5e152f746 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -701,7 +701,7 @@ class EngineService: tasks = self.scheduler.get_requests( available_blocks=available_blocks, block_size=self.cfg.cache_config.block_size, - reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num, + reserved_output_blocks=0, # self.cfg.cache_config.enc_dec_block_num max_num_batched_tokens=max_num_batched_tokens, batch=num_prefill_batch, )