mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[feat] add metrics for yiyan adapter (#3219)
* [feat] add metrics for yiyan adapter * [fix] fix metrics num_requests_waiting and num_requests_running * [fix] fix metrics gpu_cache_usage_perc * [refactor] change where requests_number increases * [chore] rename xxx_block_num as xxx_gpu_block_num, and update their values accordingly * [chore] delete useless code
This commit is contained in:
@@ -31,6 +31,7 @@ from fastdeploy import envs
|
||||
from fastdeploy.cache_manager.cache_data import BlockNode, CacheStatus
|
||||
from fastdeploy.cache_manager.cache_metrics import CacheMetrics
|
||||
from fastdeploy.inter_communicator import EngineCacheQueue, IPCSignal
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import get_logger
|
||||
|
||||
logger = get_logger("prefix_cache_manager", "prefix_cache_manager.log")
|
||||
@@ -106,6 +107,10 @@ class PrefixCacheManager:
|
||||
+ f"{self.num_cpu_blocks}, bytes_per_layer_per_block {self.cache_config.bytes_per_layer_per_block}"
|
||||
)
|
||||
|
||||
@property
|
||||
def available_gpu_resource(self):
|
||||
return len(self.gpu_free_block_list) / self.num_gpu_blocks if self.num_gpu_blocks > 0 else 0.0
|
||||
|
||||
def launch_cache_manager(
|
||||
self,
|
||||
cache_config,
|
||||
@@ -289,6 +294,9 @@ class PrefixCacheManager:
|
||||
heapq.heapify(self.gpu_free_block_list)
|
||||
self.node_id_pool = list(range(self.num_gpu_blocks + self.num_cpu_blocks))
|
||||
|
||||
main_process_metrics.max_gpu_block_num.set(self.num_gpu_blocks)
|
||||
main_process_metrics.available_gpu_resource.set(1.0)
|
||||
|
||||
def _enable_cpu_cache(self):
|
||||
"""
|
||||
_enable_cpu_cache function used to enable cpu cache.
|
||||
@@ -324,6 +332,8 @@ class PrefixCacheManager:
|
||||
logger.info(
|
||||
f"allocate_gpu_blocks: {allocated_block_ids}, len(self.gpu_free_block_list) {len(self.gpu_free_block_list)}"
|
||||
)
|
||||
main_process_metrics.free_gpu_block_num.set(len(self.gpu_free_block_list))
|
||||
main_process_metrics.available_gpu_resource.set(self.available_gpu_resource)
|
||||
return allocated_block_ids
|
||||
|
||||
def recycle_gpu_blocks(self, gpu_block_ids):
|
||||
@@ -338,6 +348,8 @@ class PrefixCacheManager:
|
||||
heapq.heappush(self.gpu_free_block_list, gpu_block_id)
|
||||
else:
|
||||
heapq.heappush(self.gpu_free_block_list, gpu_block_ids)
|
||||
main_process_metrics.free_gpu_block_num.set(len(self.gpu_free_block_list))
|
||||
main_process_metrics.available_gpu_resource.set(self.available_gpu_resource)
|
||||
|
||||
def allocate_cpu_blocks(self, num_blocks):
|
||||
"""
|
||||
|
Reference in New Issue
Block a user