From 0358329946ff3f24ac8d2ce52d3503771e99d2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B3=B3=E6=A1=A6?= <39643373+liyonghua0910@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:56:00 +0800 Subject: [PATCH] [fix] initialize available_gpu_block_num with max_gpu_block_num (#4193) --- fastdeploy/cache_manager/prefix_cache_manager.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index 14c7bcb00..5c3c4a63b 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -113,6 +113,10 @@ class PrefixCacheManager: + f"{self.num_cpu_blocks}, bytes_per_layer_per_block {self.cache_config.bytes_per_layer_per_block}" ) + main_process_metrics.max_gpu_block_num.set(self.num_gpu_blocks) + main_process_metrics.available_gpu_block_num.set(self.num_gpu_blocks) + main_process_metrics.available_gpu_resource.set(1.0) + @property def available_gpu_resource(self): return len(self.gpu_free_block_list) / self.num_gpu_blocks if self.num_gpu_blocks > 0 else 0.0 @@ -269,6 +273,7 @@ class PrefixCacheManager: self.node_id_pool = list(range(self.num_gpu_blocks + self.num_cpu_blocks)) main_process_metrics.max_gpu_block_num.set(self.num_gpu_blocks) + main_process_metrics.available_gpu_block_num.set(self.num_gpu_blocks) main_process_metrics.available_gpu_resource.set(1.0) def can_allocate_gpu_blocks(self, num_blocks: int):