mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] Add timestamp for profiler (#4726)
* [Feature] Add timestamp for profiler * fix bug for offine inference * fix for ci * fix * fix ci
This commit is contained in:
@@ -402,6 +402,7 @@ class AsyncLLMEngine:
|
||||
|
||||
try:
|
||||
request = Request.from_dict(prompt)
|
||||
request.llm_engine_recv_req_timestamp = time.time()
|
||||
|
||||
# Check if already preprocessed by AsyncEngineClient
|
||||
is_preprocessed = prompt.get("_preprocessed", False)
|
||||
|
||||
@@ -839,6 +839,7 @@ class EngineService:
|
||||
err_msg = None
|
||||
try:
|
||||
request = Request.from_dict(data)
|
||||
request.llm_engine_recv_req_timestamp = time.time()
|
||||
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
|
||||
main_process_metrics.requests_number.inc()
|
||||
self.llm_logger.debug(f"Receive request: {request}")
|
||||
|
||||
@@ -249,6 +249,7 @@ class LLMEngine:
|
||||
if sampling_params is not None:
|
||||
task.update(asdict(sampling_params))
|
||||
request = Request.from_dict(task)
|
||||
request.llm_engine_recv_req_timestamp = time.time()
|
||||
llm_logger.info(f"Receive request {request}")
|
||||
if sampling_params is not None:
|
||||
request.sampling_params = sampling_params
|
||||
|
||||
@@ -399,6 +399,9 @@ class RequestMetrics:
|
||||
model_forward_time: Optional[float] = None
|
||||
model_execute_time: Optional[float] = None
|
||||
request_start_time: Optional[float] = None
|
||||
llm_engine_recv_req_timestamp: Optional[float] = None
|
||||
llm_engine_send_req_to_engine_timestamp: Optional[float] = None
|
||||
llm_engine_recv_token_timestamp: Optional[float] = None
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
@@ -413,6 +416,9 @@ class RequestMetrics:
|
||||
"model_forward_time": self.model_forward_time,
|
||||
"model_execute_time": self.model_execute_time,
|
||||
"request_start_time": self.request_start_time,
|
||||
"llm_engine_recv_req_timestamp": self.llm_engine_recv_req_timestamp,
|
||||
"llm_engine_send_req_to_engine_timestamp": self.llm_engine_send_req_to_engine_timestamp,
|
||||
"llm_engine_recv_token_timestamp": self.llm_engine_recv_token_timestamp,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -630,8 +630,10 @@ class TokenProcessor:
|
||||
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
|
||||
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
|
||||
request_start_time=task.arrival_time,
|
||||
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
|
||||
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
|
||||
llm_engine_recv_token_timestamp=time.time(),
|
||||
)
|
||||
|
||||
self._record_first_token_metrics(task, current_time)
|
||||
|
||||
else:
|
||||
@@ -639,6 +641,9 @@ class TokenProcessor:
|
||||
arrival_time=time.time(),
|
||||
request_start_time=task.arrival_time,
|
||||
model_execute_time=time.time() - task.inference_start_time,
|
||||
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
|
||||
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
|
||||
llm_engine_recv_token_timestamp=time.time(),
|
||||
)
|
||||
self.number_of_output_tokens += len(token_ids)
|
||||
self._record_metrics(task, current_time, token_ids)
|
||||
|
||||
@@ -68,6 +68,7 @@ class MockTask:
|
||||
self.prefill_chunk_info = None
|
||||
self.prefill_chunk_num = 0
|
||||
self.pooling_params = None
|
||||
self.llm_engine_recv_req_timestamp = time.time()
|
||||
|
||||
def get(self, key: str, default_value=None):
|
||||
if hasattr(self, key):
|
||||
|
||||
@@ -46,6 +46,7 @@ class MockTask:
|
||||
self.disaggregate_info = None
|
||||
self.prefill_chunk_info = None
|
||||
self.prefill_chunk_num = 0
|
||||
self.llm_engine_recv_req_timestamp = time.time()
|
||||
|
||||
def get(self, key: str, default_value=None):
|
||||
if hasattr(self, key):
|
||||
|
||||
Reference in New Issue
Block a user