From cc8f5312f5a39aa9a801b561a92b684849e92dd0 Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Wed, 5 Nov 2025 12:04:59 +0800 Subject: [PATCH] [Feature] Add timestamp for profiler (#4726) * [Feature] Add timestamp for profiler * fix bug for offine inference * fix for ci * fix * fix ci --- fastdeploy/engine/async_llm.py | 1 + fastdeploy/engine/common_engine.py | 1 + fastdeploy/engine/engine.py | 1 + fastdeploy/engine/request.py | 6 ++++++ fastdeploy/output/token_processor.py | 7 ++++++- tests/output/test_get_save_output_v1.py | 1 + tests/output/test_process_batch_output.py | 1 + 7 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/async_llm.py b/fastdeploy/engine/async_llm.py index 249fe2d94..11f809b99 100644 --- a/fastdeploy/engine/async_llm.py +++ b/fastdeploy/engine/async_llm.py @@ -402,6 +402,7 @@ class AsyncLLMEngine: try: request = Request.from_dict(prompt) + request.llm_engine_recv_req_timestamp = time.time() # Check if already preprocessed by AsyncEngineClient is_preprocessed = prompt.get("_preprocessed", False) diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index c346d1c75..bc5f86853 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -839,6 +839,7 @@ class EngineService: err_msg = None try: request = Request.from_dict(data) + request.llm_engine_recv_req_timestamp = time.time() start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER) main_process_metrics.requests_number.inc() self.llm_logger.debug(f"Receive request: {request}") diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index d36591774..fb57f2abe 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -249,6 +249,7 @@ class LLMEngine: if sampling_params is not None: task.update(asdict(sampling_params)) request = Request.from_dict(task) + request.llm_engine_recv_req_timestamp = time.time() llm_logger.info(f"Receive request {request}") if sampling_params is not None: request.sampling_params = sampling_params diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 3036f530f..d09066855 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -399,6 +399,9 @@ class RequestMetrics: model_forward_time: Optional[float] = None model_execute_time: Optional[float] = None request_start_time: Optional[float] = None + llm_engine_recv_req_timestamp: Optional[float] = None + llm_engine_send_req_to_engine_timestamp: Optional[float] = None + llm_engine_recv_token_timestamp: Optional[float] = None def to_dict(self): """ @@ -413,6 +416,9 @@ class RequestMetrics: "model_forward_time": self.model_forward_time, "model_execute_time": self.model_execute_time, "request_start_time": self.request_start_time, + "llm_engine_recv_req_timestamp": self.llm_engine_recv_req_timestamp, + "llm_engine_send_req_to_engine_timestamp": self.llm_engine_send_req_to_engine_timestamp, + "llm_engine_recv_token_timestamp": self.llm_engine_recv_token_timestamp, } @classmethod diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 5d94a2c2b..681f1b0d0 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -630,8 +630,10 @@ class TokenProcessor: time_in_queue=task.schedule_start_time - task.preprocess_end_time, preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time, request_start_time=task.arrival_time, + llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp, + llm_engine_send_req_to_engine_timestamp=task.inference_start_time, + llm_engine_recv_token_timestamp=time.time(), ) - self._record_first_token_metrics(task, current_time) else: @@ -639,6 +641,9 @@ class TokenProcessor: arrival_time=time.time(), request_start_time=task.arrival_time, model_execute_time=time.time() - task.inference_start_time, + llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp, + llm_engine_send_req_to_engine_timestamp=task.inference_start_time, + llm_engine_recv_token_timestamp=time.time(), ) self.number_of_output_tokens += len(token_ids) self._record_metrics(task, current_time, token_ids) diff --git a/tests/output/test_get_save_output_v1.py b/tests/output/test_get_save_output_v1.py index 056ee71fa..93971f688 100644 --- a/tests/output/test_get_save_output_v1.py +++ b/tests/output/test_get_save_output_v1.py @@ -68,6 +68,7 @@ class MockTask: self.prefill_chunk_info = None self.prefill_chunk_num = 0 self.pooling_params = None + self.llm_engine_recv_req_timestamp = time.time() def get(self, key: str, default_value=None): if hasattr(self, key): diff --git a/tests/output/test_process_batch_output.py b/tests/output/test_process_batch_output.py index dd5d4ed9d..a205aa752 100644 --- a/tests/output/test_process_batch_output.py +++ b/tests/output/test_process_batch_output.py @@ -46,6 +46,7 @@ class MockTask: self.disaggregate_info = None self.prefill_chunk_info = None self.prefill_chunk_num = 0 + self.llm_engine_recv_req_timestamp = time.time() def get(self, key: str, default_value=None): if hasattr(self, key):