[Bug fix] fix block num setting in scheduler v1 for develop (#3303)

* fix block num setting in scheduler v1

* fix block num setting in scheduler v1

* fix max_block_num and max_num_batched_tokens setting

* fix max_block_num and max_num_batched_tokens setting

* fix max_block_num and max_num_batched_tokens setting

* fix max_block_num and max_num_batched_tokens setting
This commit is contained in:
chenjian
2025-08-12 10:38:51 +08:00
committed by GitHub
parent 183e3863e8
commit b21272d9ff
4 changed files with 30 additions and 10 deletions

View File

@@ -64,7 +64,10 @@ class PrefixCacheManager:
self.speculative_config = config.speculative_config
self.local_data_parallel_id = local_data_parallel_id
self.num_gpu_blocks = self.cache_config.prefill_kvcache_block_num
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
self.num_gpu_blocks = self.cache_config.total_block_num
else:
self.num_gpu_blocks = self.cache_config.prefill_kvcache_block_num
self.num_cpu_blocks = self.cache_config.num_cpu_blocks
self.gpu_free_block_list = list(range(self.num_gpu_blocks - 1, -1, -1))
if self.num_cpu_blocks > 0: