[FDConfig] add block number verfied (#4983)

* Update config.py

* fix

* update unit test

---------

Co-authored-by: ltd0924 <luotingdan@baidu.com>
This commit is contained in:
ltd0924
2025-11-13 09:48:44 +08:00
committed by GitHub
parent 1c0b0b08b7
commit 303c986cc7
4 changed files with 30 additions and 17 deletions

View File

@@ -1290,6 +1290,9 @@ class CacheConfig:
self.prefill_kvcache_block_num = self.total_block_num
else:
self.prefill_kvcache_block_num = int(self.total_block_num * self.kv_cache_ratio)
assert (
self.prefill_kvcache_block_num >= self.max_block_num_per_seq
), f"current block number :{self.prefill_kvcache_block_num} should be greater than or equal to current model len needed minimum block number :{self.max_block_num_per_seq}"
else:
length = num_total_tokens // number_of_tasks
block_num = (length + self.block_size - 1 + self.dec_token_num) // self.block_size
@@ -1310,6 +1313,9 @@ class CacheConfig:
f"Reset block num, the total_block_num:{self.total_block_num},"
f" prefill_kvcache_block_num:{self.prefill_kvcache_block_num}"
)
assert (
self.prefill_kvcache_block_num >= self.max_block_num_per_seq
), f"current block number :{self.prefill_kvcache_block_num} should be greater than or equal to current model len needed minimum block number :{self.max_block_num_per_seq}"
def print(self):
"""
@@ -1585,8 +1591,8 @@ class FDConfig:
if self.long_prefill_token_threshold == 0:
self.long_prefill_token_threshold = int(self.model_config.max_model_len * 0.04)
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
self.cache_config.max_block_num_per_seq = int(self.model_config.max_model_len // self.cache_config.block_size)
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
self.cache_config.enable_prefix_caching = False