mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-17 14:11:14 +08:00
Fix block num in schduelr v1 for release 2.1 (#3315)
* fix bug for scheduler v0 * fix block num setting in scheduler v1 for release 2.1 * fix block num setting in scheduler v1 for release 2.1 --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: YUNSHEN XIE <1084314248@qq.com>
This commit is contained in:
@@ -64,7 +64,10 @@ class PrefixCacheManager:
|
||||
self.speculative_config = config.speculative_config
|
||||
self.local_data_parallel_id = local_data_parallel_id
|
||||
|
||||
self.num_gpu_blocks = self.cache_config.prefill_kvcache_block_num
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.num_gpu_blocks = self.cache_config.total_block_num
|
||||
else:
|
||||
self.num_gpu_blocks = self.cache_config.prefill_kvcache_block_num
|
||||
self.num_cpu_blocks = self.cache_config.num_cpu_blocks
|
||||
self.gpu_free_block_list = list(range(self.num_gpu_blocks - 1, -1, -1))
|
||||
if self.num_cpu_blocks > 0:
|
||||
|
Reference in New Issue
Block a user