[feat] add metrics for yiyan adapter (#3219)

* [feat] add metrics for yiyan adapter

* [fix] fix metrics num_requests_waiting and num_requests_running

* [fix] fix metrics gpu_cache_usage_perc

* [refactor] change where requests_number increases

* [chore] rename xxx_block_num as xxx_gpu_block_num, and update their values accordingly

* [chore] delete useless code
This commit is contained in:
李泳桦
2025-08-21 16:58:10 +08:00
committed by GitHub
parent 6854506533
commit d18a637a17
7 changed files with 181 additions and 18 deletions

View File

@@ -24,6 +24,7 @@ import zmq
from fastdeploy import envs
from fastdeploy.engine.request import CompletionOutput, Request, RequestOutput
from fastdeploy.inter_communicator import EngineWorkerQueue
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.utils import get_logger
@@ -158,6 +159,7 @@ class SplitwiseConnector:
except zmq.Again:
self.logger.warning(f"Send queue full for {addr}")
except Exception as e:
main_process_metrics.send_cache_failed_num.inc()
self.logger.error(f"Send to {addr} failed: {e}")
self._close_connection(addr)