mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[FDConfig] add block number verfied (#4983)
* Update config.py * fix * update unit test --------- Co-authored-by: ltd0924 <luotingdan@baidu.com>
This commit is contained in:
@@ -1290,6 +1290,9 @@ class CacheConfig:
|
||||
self.prefill_kvcache_block_num = self.total_block_num
|
||||
else:
|
||||
self.prefill_kvcache_block_num = int(self.total_block_num * self.kv_cache_ratio)
|
||||
assert (
|
||||
self.prefill_kvcache_block_num >= self.max_block_num_per_seq
|
||||
), f"current block number :{self.prefill_kvcache_block_num} should be greater than or equal to current model len needed minimum block number :{self.max_block_num_per_seq}"
|
||||
else:
|
||||
length = num_total_tokens // number_of_tasks
|
||||
block_num = (length + self.block_size - 1 + self.dec_token_num) // self.block_size
|
||||
@@ -1310,6 +1313,9 @@ class CacheConfig:
|
||||
f"Reset block num, the total_block_num:{self.total_block_num},"
|
||||
f" prefill_kvcache_block_num:{self.prefill_kvcache_block_num}"
|
||||
)
|
||||
assert (
|
||||
self.prefill_kvcache_block_num >= self.max_block_num_per_seq
|
||||
), f"current block number :{self.prefill_kvcache_block_num} should be greater than or equal to current model len needed minimum block number :{self.max_block_num_per_seq}"
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
@@ -1585,8 +1591,8 @@ class FDConfig:
|
||||
if self.long_prefill_token_threshold == 0:
|
||||
self.long_prefill_token_threshold = int(self.model_config.max_model_len * 0.04)
|
||||
|
||||
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
|
||||
self.cache_config.max_block_num_per_seq = int(self.model_config.max_model_len // self.cache_config.block_size)
|
||||
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
|
||||
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.cache_config.enable_prefix_caching = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user