mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-16 13:41:30 +08:00
Update benchmark_serving.py (#4438)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
丢弃的请求依旧保存,用于结果分析
This commit is contained in:
@@ -463,6 +463,7 @@ async def benchmark(
|
||||
if pbar is not None:
|
||||
pbar.close()
|
||||
|
||||
benchmark_outputs = outputs
|
||||
drop_ratio = args.drop_ratio
|
||||
if 0.0 < drop_ratio < 1:
|
||||
# 按drop_ratio头尾各舍弃一半请求,不计入benchmark统计
|
||||
@@ -470,7 +471,7 @@ async def benchmark(
|
||||
drop_count = int(n * drop_ratio)
|
||||
half = drop_count // 2
|
||||
if half > 0:
|
||||
outputs = outputs[half : n - half]
|
||||
benchmark_outputs = outputs[half : n - half]
|
||||
|
||||
# 根据收到最后一个chunk的时间戳计算总时长
|
||||
if len(outputs) >= 2:
|
||||
@@ -479,7 +480,7 @@ async def benchmark(
|
||||
benchmark_duration = 0.0
|
||||
|
||||
print(f"丢弃前数量: {n}")
|
||||
print(f"丢弃后数量: {len(outputs)}")
|
||||
print(f"丢弃后数量: {len(benchmark_outputs)}")
|
||||
print(f"benchmark_duration: {benchmark_duration} 秒")
|
||||
else:
|
||||
benchmark_duration = time.perf_counter() - benchmark_start_time
|
||||
@@ -487,7 +488,7 @@ async def benchmark(
|
||||
|
||||
metrics, actual_output_lens = calculate_metrics(
|
||||
# input_requests=input_requests,
|
||||
outputs=outputs,
|
||||
outputs=benchmark_outputs,
|
||||
dur_s=benchmark_duration,
|
||||
# tokenizer=tokenizer,
|
||||
selected_percentiles=selected_percentiles,
|
||||
@@ -516,7 +517,7 @@ async def benchmark(
|
||||
"total_token_throughput": metrics.total_token_throughput,
|
||||
"input_lens": [output.prompt_len for output in outputs],
|
||||
"infer_input_lens": [output.prompt_tokens for output in outputs],
|
||||
"output_lens": actual_output_lens,
|
||||
"output_lens": [output.output_tokens for output in outputs],
|
||||
"ttfts": [output.ttft for output in outputs],
|
||||
"itls": [output.itl for output in outputs],
|
||||
"input_texts": [input.prompt for input in input_requests],
|
||||
|
Reference in New Issue
Block a user