From 99258e19c8756fc0e0a5156dbb293b5a3e87c43b Mon Sep 17 00:00:00 2001 From: ophilia-lee <58770600+ophilia-lee@users.noreply.github.com> Date: Tue, 23 Dec 2025 19:46:23 +0800 Subject: [PATCH] =?UTF-8?q?[Benchmark]=E6=94=AF=E6=8C=81Completions?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=20(#5700)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * benchmark工具支持受限解码场景指定response_format * Update backend_request_func.py output.success判断兼容思考内容超长截断时回复内容为空的情况 * Update benchmark_serving.py 更新benchmark_metrics * 支持Completions接口 * 支持Completions接口 * 支持Completions接口 * [Benchmark]支持Completions接口 * [Benchmark]支持Completions接口 --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> --- .github/pull_request_template.md | 2 +- benchmarks/backend_request_func.py | 3 ++- benchmarks/benchmark_dataset.py | 17 ++++++++++------- benchmarks/benchmark_serving.py | 1 + tests/ce/server/test_prompt_ids.py | 7 +++---- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ba1712559..7f3c72aa8 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -8,7 +8,7 @@ > :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191) -> :bulb: 如若此PR是Cherry Pick,PR标题需遵循格式,在最开始加上[Cherry-Pick]标签,以及最后面加上原PR ID,例如[Cherry-Pick][CI] Add check trigger and logic(#5191) +> :bulb: 如若此PR是Cherry Pick,PR标题需遵循格式,在最开始加上[Cherry-Pick]标签,以及最后面加上原PR ID,例如[Cherry-Pick][CI] Add check trigger and logic(#5191) ## Modifications diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 40b719e06..d68180fc1 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -273,7 +273,8 @@ async def async_request_eb_openai_chat_completions( # 新增metrics统计,计算首token过滤空包 output.metrics = metrics_summary(metrics_list, token_timestamps[1:]) - if output.generated_text.strip() == "": + # 兼容思考内容超长截断的情况,此时回复内容为空 + if output.generated_text.strip() == "" and output.reasoning_content.strip() == "": output.success = False output.reasoning_tokens = output.output_tokens output.error = "No generated text found!" diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 8c35867ad..ab7c8deb3 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -233,20 +233,23 @@ class EBDataset(BenchmarkDataset): for entry in self.data: if len(samples) >= num_requests: break + json_data = entry + prompt = entry["text"] - self.temperature = float(entry["temperature"]) - self.repetition_penalty = float(entry["penalty_score"]) - self.frequency_penalty = float(entry["frequency_score"]) - self.presence_penalty = float(entry["presence_score"]) - self.top_p = float(entry["topp"]) - self.prompt_len = int(entry["input_token_num"]) - new_output_len = int(entry["max_dec_len"]) + self.temperature = float(entry.get("temperature", 1)) + self.repetition_penalty = float(entry.get("penalty_score", 0)) + self.frequency_penalty = float(entry.get("frequency_score", 0)) + self.presence_penalty = float(entry.get("presence_score", 0)) + self.top_p = float(entry.get("topp", 1)) + self.prompt_len = int(entry.get("input_token_num", 0)) + new_output_len = int(entry.get("max_dec_len", 0)) if enable_multimodal_chat: prompt = self.apply_multimodal_chat_transformation(prompt, None) samples.append( SampleRequest( no=cnt, + json_data=json_data, prompt=prompt, prompt_len=self.prompt_len, history_QA=[], diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index e48b65b4b..a87c8f8ac 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1233,6 +1233,7 @@ if __name__ == "__main__": type=str, default="EBChat", choices=[ + "EB", "EBChat", "random", ], diff --git a/tests/ce/server/test_prompt_ids.py b/tests/ce/server/test_prompt_ids.py index e49b974c6..be05d4c18 100644 --- a/tests/ce/server/test_prompt_ids.py +++ b/tests/ce/server/test_prompt_ids.py @@ -24,9 +24,9 @@ def test_incremental_image_reasoning_consistency(): "type": "image_url", "image_url": { "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg" - } + }, }, - {"type": "text", "text": "图中的文物属于哪个年代?"} + {"type": "text", "text": "图中的文物属于哪个年代?"}, ], } ], @@ -69,10 +69,9 @@ def test_incremental_image_reasoning_consistency(): "type": "image_url", "image_url": { "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg" - } + }, }, ], - } ], "prompt_token_ids": tokens1,