mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[metrics] Add serveral observability metrics (#3868)
* Add several observability metrics * [wenxin-tools-584] 【可观测性】支持查看本节点的并发数、剩余block_size、排队请求数等信息 * adjust some metrics and md files * trigger ci * adjust ci file * trigger ci * trigger ci --------- Co-authored-by: K11OntheBoat <your_email@example.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import get_logger
|
||||
|
||||
logger = get_logger("prefix_cache_manager", "prefix_cache_manager.log")
|
||||
@@ -54,6 +55,11 @@ class CacheMetrics:
|
||||
self.cpu_hit_token_ratio = self.total_cpu_matched_token_num / self.total_token_num
|
||||
self.gpu_hit_token_ratio = self.total_gpu_matched_token_num / self.total_token_num
|
||||
|
||||
main_process_metrics.hit_req_rate.set(self.hit_req_ratio)
|
||||
main_process_metrics.hit_token_rate.set(self.hit_token_ratio)
|
||||
main_process_metrics.cpu_hit_token_rate.set(self.cpu_hit_token_ratio)
|
||||
main_process_metrics.gpu_hit_token_rate.set(self.gpu_hit_token_ratio)
|
||||
|
||||
logger.info(
|
||||
f"Metrics for all requests: req_count {self.req_count} hit_req_count {self.hit_req_count}"
|
||||
+ f" hit_req_ratio {self.hit_req_ratio:.2f} hit_token_ratio {self.hit_token_ratio:.2f}"
|
||||
|
Reference in New Issue
Block a user