[Benchmark]支持Completions接口 (#5700)

* benchmark工具支持受限解码场景指定response_format * Update backend_request_func.py output.success判断兼容思考内容超长截断时回复内容为空的情况 * Update benchmark_serving.py 更新benchmark_metrics * 支持Completions接口 * 支持Completions接口 * 支持Completions接口 * [Benchmark]支持Completions接口 * [Benchmark]支持Completions接口 --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-12-23 19:46:23 +08:00
parent 04c30521dd
commit 99258e19c8
5 changed files with 17 additions and 13 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -8,7 +8,7 @@
 > :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191)
-> :bulb: 如若此PR是Cherry Pick，PR标题需遵循格式，在最开始加上[Cherry-Pick]标签，以及最后面加上原PR ID，例如[Cherry-Pick][CI] Add check trigger and logic(#5191) 
+> :bulb: 如若此PR是Cherry Pick，PR标题需遵循格式，在最开始加上[Cherry-Pick]标签，以及最后面加上原PR ID，例如[Cherry-Pick][CI] Add check trigger and logic(#5191)
 ## Modifications
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -273,7 +273,8 @@ async def async_request_eb_openai_chat_completions(
                    # 新增metrics统计，计算首token过滤空包
                    output.metrics = metrics_summary(metrics_list, token_timestamps[1:])
-                    if output.generated_text.strip() == "":
+                    # 兼容思考内容超长截断的情况，此时回复内容为空
                    if output.generated_text.strip() == "" and output.reasoning_content.strip() == "":
                        output.success = False
                        output.reasoning_tokens = output.output_tokens
                        output.error = "No generated text found!"
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -233,20 +233,23 @@ class EBDataset(BenchmarkDataset):
        for entry in self.data:
            if len(samples) >= num_requests:
                break
            json_data = entry
            prompt = entry["text"]
-            self.temperature = float(entry["temperature"])
+            self.temperature = float(entry.get("temperature", 1))
-            self.repetition_penalty = float(entry["penalty_score"])
+            self.repetition_penalty = float(entry.get("penalty_score", 0))
-            self.frequency_penalty = float(entry["frequency_score"])
+            self.frequency_penalty = float(entry.get("frequency_score", 0))
-            self.presence_penalty = float(entry["presence_score"])
+            self.presence_penalty = float(entry.get("presence_score", 0))
-            self.top_p = float(entry["topp"])
+            self.top_p = float(entry.get("topp", 1))
-            self.prompt_len = int(entry["input_token_num"])
+            self.prompt_len = int(entry.get("input_token_num", 0))
-            new_output_len = int(entry["max_dec_len"])
+            new_output_len = int(entry.get("max_dec_len", 0))
            if enable_multimodal_chat:
                prompt = self.apply_multimodal_chat_transformation(prompt, None)
            samples.append(
                SampleRequest(
                    no=cnt,
                    json_data=json_data,
                    prompt=prompt,
                    prompt_len=self.prompt_len,
                    history_QA=[],
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -1233,6 +1233,7 @@ if __name__ == "__main__":
        type=str,
        default="EBChat",
        choices=[
            "EB",
            "EBChat",
            "random",
        ],
--- a/tests/ce/server/test_prompt_ids.py
+++ b/tests/ce/server/test_prompt_ids.py
@@ -24,9 +24,9 @@ def test_incremental_image_reasoning_consistency():
                        "type": "image_url",
                        "image_url": {
                            "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
-                        }
+                        },
                    },
-                    {"type": "text", "text": "图中的文物属于哪个年代？"}
+                    {"type": "text", "text": "图中的文物属于哪个年代？"},
                ],
            }
        ],
@@ -69,10 +69,9 @@ def test_incremental_image_reasoning_consistency():
                        "type": "image_url",
                        "image_url": {
                            "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
-                        }
+                        },
                    },
                ],
            }
        ],
        "prompt_token_ids": tokens1,