Sync v2.0 version of code to github repo

This commit is contained in:
Jiang-Jia-Jun
2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions

View File

@@ -22,7 +22,9 @@ import atexit
import shutil
from threading import Lock
from prometheus_client import Histogram
from prometheus_client import Histogram, Counter
from fastdeploy.metrics.metrics import build_1_2_5_buckets
class WorkMetricsManager(object):
@@ -44,6 +46,20 @@ class WorkMetricsManager(object):
40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0
]
)
self.request_params_max_tokens = Histogram(
name='fastdeploy:request_params_max_tokens',
documentation='Histogram of max_tokens parameter in request parameters',
buckets=build_1_2_5_buckets(33792)
)
self.prompt_tokens_total = Counter(
name="fastdeploy:prompt_tokens_total",
documentation="Total number of prompt tokens processed",
)
self.request_prompt_tokens = Histogram(
name='fastdeploy:request_prompt_tokens',
documentation='Number of prefill tokens processed.',
buckets=build_1_2_5_buckets(33792)
)
self._initialized = True