mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[BugFix] Fix inference_start_time (#4922)
* fix inference_start_time * fix inference_start_time
This commit is contained in:
@@ -196,6 +196,7 @@ class OpenAIServingChat:
|
||||
num_cached_tokens = 0
|
||||
num_image_tokens = [0] * num_choices
|
||||
tool_called = [False] * num_choices
|
||||
inference_start_time = [0] * num_choices
|
||||
max_streaming_response_tokens = (
|
||||
request.max_streaming_response_tokens
|
||||
if request.max_streaming_response_tokens is not None
|
||||
@@ -272,9 +273,9 @@ class OpenAIServingChat:
|
||||
|
||||
if res["metrics"]["first_token_time"] is not None:
|
||||
arrival_time = res["metrics"]["first_token_time"]
|
||||
inference_start_time = res["metrics"]["inference_start_time"]
|
||||
inference_start_time[idx] = res["metrics"]["inference_start_time"]
|
||||
else:
|
||||
arrival_time = res["metrics"]["arrival_time"] - inference_start_time
|
||||
arrival_time = res["metrics"]["arrival_time"] - inference_start_time[idx]
|
||||
if first_iteration:
|
||||
num_prompt_tokens = len(prompt_token_ids)
|
||||
num_cached_tokens = res.get("num_cached_tokens", 0)
|
||||
|
||||
Reference in New Issue
Block a user