Update benchmark_serving.py (#4438)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

丢弃的请求依旧保存,用于结果分析
This commit is contained in:
Zhang Yulong
2025-10-15 20:36:19 +08:00
committed by GitHub
parent b4b579a7ed
commit 7f94f063ff

View File

@@ -463,6 +463,7 @@ async def benchmark(
if pbar is not None:
pbar.close()
benchmark_outputs = outputs
drop_ratio = args.drop_ratio
if 0.0 < drop_ratio < 1:
# 按drop_ratio头尾各舍弃一半请求不计入benchmark统计
@@ -470,7 +471,7 @@ async def benchmark(
drop_count = int(n * drop_ratio)
half = drop_count // 2
if half > 0:
outputs = outputs[half : n - half]
benchmark_outputs = outputs[half : n - half]
# 根据收到最后一个chunk的时间戳计算总时长
if len(outputs) >= 2:
@@ -479,7 +480,7 @@ async def benchmark(
benchmark_duration = 0.0
print(f"丢弃前数量: {n}")
print(f"丢弃后数量: {len(outputs)}")
print(f"丢弃后数量: {len(benchmark_outputs)}")
print(f"benchmark_duration: {benchmark_duration}")
else:
benchmark_duration = time.perf_counter() - benchmark_start_time
@@ -487,7 +488,7 @@ async def benchmark(
metrics, actual_output_lens = calculate_metrics(
# input_requests=input_requests,
outputs=outputs,
outputs=benchmark_outputs,
dur_s=benchmark_duration,
# tokenizer=tokenizer,
selected_percentiles=selected_percentiles,
@@ -516,7 +517,7 @@ async def benchmark(
"total_token_throughput": metrics.total_token_throughput,
"input_lens": [output.prompt_len for output in outputs],
"infer_input_lens": [output.prompt_tokens for output in outputs],
"output_lens": actual_output_lens,
"output_lens": [output.output_tokens for output in outputs],
"ttfts": [output.ttft for output in outputs],
"itls": [output.itl for output in outputs],
"input_texts": [input.prompt for input in input_requests],