mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
update benchmark scripts (#4497)
This commit is contained in:
@@ -112,6 +112,7 @@ async def async_request_eb_openai_chat_completions(
|
|||||||
output = RequestFuncOutput()
|
output = RequestFuncOutput()
|
||||||
output.prompt_len = 0
|
output.prompt_len = 0
|
||||||
output.no = request_func_input.no
|
output.no = request_func_input.no
|
||||||
|
request_id = "None"
|
||||||
|
|
||||||
ttft = 0.0
|
ttft = 0.0
|
||||||
st = time.perf_counter()
|
st = time.perf_counter()
|
||||||
@@ -131,6 +132,9 @@ async def async_request_eb_openai_chat_completions(
|
|||||||
timestamp = time.perf_counter()
|
timestamp = time.perf_counter()
|
||||||
data = json.loads(chunk)
|
data = json.loads(chunk)
|
||||||
|
|
||||||
|
if request_id == "None" and "id" in data:
|
||||||
|
request_id = data["id"]
|
||||||
|
|
||||||
if choices := data.get("choices"):
|
if choices := data.get("choices"):
|
||||||
content = choices[0]["delta"].get("content")
|
content = choices[0]["delta"].get("content")
|
||||||
reason_content = choices[0]["delta"].get("reasoning_content")
|
reason_content = choices[0]["delta"].get("reasoning_content")
|
||||||
@@ -175,12 +179,13 @@ async def async_request_eb_openai_chat_completions(
|
|||||||
)
|
)
|
||||||
output.error = error_text or ""
|
output.error = error_text or ""
|
||||||
output.success = False
|
output.success = False
|
||||||
output.request_id = data.get("id", "")
|
|
||||||
except Exception:
|
except Exception:
|
||||||
output.success = False
|
output.success = False
|
||||||
exc_info = sys.exc_info()
|
exc_info = sys.exc_info()
|
||||||
output.error = "".join(traceback.format_exception(*exc_info))
|
output.error = "".join(traceback.format_exception(*exc_info))
|
||||||
|
|
||||||
|
output.request_id = request_id
|
||||||
|
|
||||||
# 保存失败请求结果
|
# 保存失败请求结果
|
||||||
if not output.success:
|
if not output.success:
|
||||||
with open("error_output.txt", "a") as f:
|
with open("error_output.txt", "a") as f:
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ def calculate_metrics(
|
|||||||
output_len = outputs[i].output_tokens
|
output_len = outputs[i].output_tokens
|
||||||
|
|
||||||
if not output_len:
|
if not output_len:
|
||||||
print("no output_len")
|
print("no output_len", outputs[i])
|
||||||
# We use the tokenizer to count the number of output tokens
|
# We use the tokenizer to count the number of output tokens
|
||||||
# for some serving backends instead of looking at
|
# for some serving backends instead of looking at
|
||||||
# len(outputs[i].itl) since multiple output tokens may be
|
# len(outputs[i].itl) since multiple output tokens may be
|
||||||
|
|||||||
Reference in New Issue
Block a user