mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Metrics] Update time_to_first_token to include tokenization & queue time, and remove redundant metrics (#4993)
* [update] update time_to_first_tokens to include queue time, and remove first_token_latency and infer_latency * [doc] update docs * [ci] fix test * [chore] delete redundant code --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
@@ -702,6 +702,7 @@ class EngineService:
|
||||
batch=num_prefill_batch,
|
||||
)
|
||||
for task in tasks:
|
||||
task.schedule_start_time = time.time()
|
||||
trace_print(LoggingEventName.REQUEST_QUEUE_END, task.request_id, getattr(task, "user", ""))
|
||||
|
||||
if self.cfg.scheduler_config.splitwise_role == "decode":
|
||||
@@ -814,6 +815,7 @@ class EngineService:
|
||||
break
|
||||
else:
|
||||
raise
|
||||
|
||||
# 2. Schedule requests
|
||||
tasks, error_tasks = self.resource_manager.schedule()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user