mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-11-01 04:12:58 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			83 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| """
 | |
| 
 | |
| """
 | |
| metrics
 | |
| """
 | |
| 
 | |
| from prometheus_client import Counter, Histogram
 | |
| 
 | |
| from fastdeploy.metrics.metrics import build_1_2_5_buckets
 | |
| 
 | |
| 
 | |
| class WorkMetricsManager:
 | |
|     """Prometheus Metrics Manager handles all metric updates"""
 | |
| 
 | |
|     _initialized = False
 | |
| 
 | |
|     def __init__(self):
 | |
|         """Initializes the Prometheus metrics and starts the HTTP server if not already initialized."""
 | |
| 
 | |
|         if self._initialized:
 | |
|             return
 | |
| 
 | |
|         self.e2e_request_latency = Histogram(
 | |
|             "fastdeploy:e2e_request_latency_seconds",
 | |
|             "End-to-end request latency (from request arrival to final response)",
 | |
|             buckets=[
 | |
|                 0.3,
 | |
|                 0.5,
 | |
|                 0.8,
 | |
|                 1.0,
 | |
|                 1.5,
 | |
|                 2.0,
 | |
|                 2.5,
 | |
|                 5.0,
 | |
|                 10.0,
 | |
|                 15.0,
 | |
|                 20.0,
 | |
|                 30.0,
 | |
|                 40.0,
 | |
|                 50.0,
 | |
|                 60.0,
 | |
|                 120.0,
 | |
|                 240.0,
 | |
|                 480.0,
 | |
|                 960.0,
 | |
|                 1920.0,
 | |
|                 7680.0,
 | |
|             ],
 | |
|         )
 | |
|         self.request_params_max_tokens = Histogram(
 | |
|             name="fastdeploy:request_params_max_tokens",
 | |
|             documentation="Histogram of max_tokens parameter in request parameters",
 | |
|             buckets=build_1_2_5_buckets(33792),
 | |
|         )
 | |
|         self.prompt_tokens_total = Counter(
 | |
|             name="fastdeploy:prompt_tokens_total",
 | |
|             documentation="Total number of prompt tokens processed",
 | |
|         )
 | |
|         self.request_prompt_tokens = Histogram(
 | |
|             name="fastdeploy:request_prompt_tokens",
 | |
|             documentation="Number of prefill tokens processed.",
 | |
|             buckets=build_1_2_5_buckets(33792),
 | |
|         )
 | |
| 
 | |
|         self._initialized = True
 | |
| 
 | |
| 
 | |
| work_process_metrics = WorkMetricsManager()
 | 
