[feat] add metrics for yiyan adapter (#3219)

* [feat] add metrics for yiyan adapter

* [fix] fix metrics num_requests_waiting and num_requests_running

* [fix] fix metrics gpu_cache_usage_perc

* [refactor] change where requests_number increases

* [chore] rename xxx_block_num as xxx_gpu_block_num, and update their values accordingly

* [chore] delete useless code
This commit is contained in:
李泳桦
2025-08-21 16:58:10 +08:00
committed by GitHub
parent 6854506533
commit d18a637a17
7 changed files with 181 additions and 18 deletions

View File

@@ -439,6 +439,8 @@ class LLMEngine:
get_request_pool.submit(_fetch_request)
# 2. Schedule requests
tasks = self.resource_manager.schedule()
main_process_metrics.num_requests_waiting.dec(len(tasks))
main_process_metrics.num_requests_running.inc(len(tasks))
# 3. Send to engine
if tasks:
self.resource_manager.get_real_bsz()
@@ -476,6 +478,7 @@ class LLMEngine:
request = Request.from_dict(data)
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
main_process_metrics.requests_number.inc()
llm_logger.debug(f"Receive request: {request}")
err_msg = None