mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
cp_fix_bug (#5253)
This commit is contained in:
@@ -1572,7 +1572,7 @@ class FDConfig:
|
||||
self.max_prefill_batch = int(os.getenv("MAX_PREFILL_NUM", "3"))
|
||||
if current_platform.is_xpu():
|
||||
self.max_prefill_batch = 1
|
||||
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
if self.model_config is not None and self.model_config.enable_mm:
|
||||
self.max_prefill_batch = 1 # TODO:当前多模prefill阶段只支持并行度为1,待优化
|
||||
else:
|
||||
self.max_prefill_batch = self.scheduler_config.max_num_seqs
|
||||
|
||||
@@ -691,8 +691,16 @@ class EngineService:
|
||||
else:
|
||||
max_num_batched_tokens = self.cfg.model_config.max_model_len
|
||||
|
||||
# In multi-mode scenarios, using available_block_num to pull requests to prevent heavy rescheduling
|
||||
# in the frequency domain due to insufficient blocks
|
||||
if self.cfg.model_config.enable_mm:
|
||||
self.resource_manager.check_and_free_block_tables()
|
||||
available_blocks = self.resource_manager.available_block_num()
|
||||
else:
|
||||
available_blocks = self.cfg.cache_config.max_block_num_per_seq
|
||||
|
||||
tasks = self.scheduler.get_requests(
|
||||
available_blocks=self.cfg.cache_config.max_block_num_per_seq,
|
||||
available_blocks=available_blocks,
|
||||
block_size=self.cfg.cache_config.block_size,
|
||||
reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num,
|
||||
max_num_batched_tokens=max_num_batched_tokens,
|
||||
|
||||
Reference in New Issue
Block a user