[Feature] Add timestamp for profiler (#4726)

* [Feature] Add timestamp for profiler

* fix bug for offine inference

* fix for ci

* fix

* fix ci
This commit is contained in:
chenjian
2025-11-05 12:04:59 +08:00
committed by GitHub
parent 876e4a8935
commit cc8f5312f5
7 changed files with 17 additions and 1 deletions

View File

@@ -402,6 +402,7 @@ class AsyncLLMEngine:
try:
request = Request.from_dict(prompt)
request.llm_engine_recv_req_timestamp = time.time()
# Check if already preprocessed by AsyncEngineClient
is_preprocessed = prompt.get("_preprocessed", False)

View File

@@ -839,6 +839,7 @@ class EngineService:
err_msg = None
try:
request = Request.from_dict(data)
request.llm_engine_recv_req_timestamp = time.time()
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
main_process_metrics.requests_number.inc()
self.llm_logger.debug(f"Receive request: {request}")

View File

@@ -249,6 +249,7 @@ class LLMEngine:
if sampling_params is not None:
task.update(asdict(sampling_params))
request = Request.from_dict(task)
request.llm_engine_recv_req_timestamp = time.time()
llm_logger.info(f"Receive request {request}")
if sampling_params is not None:
request.sampling_params = sampling_params

View File

@@ -399,6 +399,9 @@ class RequestMetrics:
model_forward_time: Optional[float] = None
model_execute_time: Optional[float] = None
request_start_time: Optional[float] = None
llm_engine_recv_req_timestamp: Optional[float] = None
llm_engine_send_req_to_engine_timestamp: Optional[float] = None
llm_engine_recv_token_timestamp: Optional[float] = None
def to_dict(self):
"""
@@ -413,6 +416,9 @@ class RequestMetrics:
"model_forward_time": self.model_forward_time,
"model_execute_time": self.model_execute_time,
"request_start_time": self.request_start_time,
"llm_engine_recv_req_timestamp": self.llm_engine_recv_req_timestamp,
"llm_engine_send_req_to_engine_timestamp": self.llm_engine_send_req_to_engine_timestamp,
"llm_engine_recv_token_timestamp": self.llm_engine_recv_token_timestamp,
}
@classmethod

View File

@@ -630,8 +630,10 @@ class TokenProcessor:
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
request_start_time=task.arrival_time,
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
llm_engine_recv_token_timestamp=time.time(),
)
self._record_first_token_metrics(task, current_time)
else:
@@ -639,6 +641,9 @@ class TokenProcessor:
arrival_time=time.time(),
request_start_time=task.arrival_time,
model_execute_time=time.time() - task.inference_start_time,
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
llm_engine_recv_token_timestamp=time.time(),
)
self.number_of_output_tokens += len(token_ids)
self._record_metrics(task, current_time, token_ids)

View File

@@ -68,6 +68,7 @@ class MockTask:
self.prefill_chunk_info = None
self.prefill_chunk_num = 0
self.pooling_params = None
self.llm_engine_recv_req_timestamp = time.time()
def get(self, key: str, default_value=None):
if hasattr(self, key):

View File

@@ -46,6 +46,7 @@ class MockTask:
self.disaggregate_info = None
self.prefill_chunk_info = None
self.prefill_chunk_num = 0
self.llm_engine_recv_req_timestamp = time.time()
def get(self, key: str, default_value=None):
if hasattr(self, key):