[Benchmark]支持Completions接口 (#5700)

* benchmark工具支持受限解码场景指定response_format

* Update backend_request_func.py

output.success判断兼容思考内容超长截断时回复内容为空的情况

* Update benchmark_serving.py

更新benchmark_metrics

* 支持Completions接口

* 支持Completions接口

* 支持Completions接口

* [Benchmark]支持Completions接口

* [Benchmark]支持Completions接口

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
ophilia-lee
2025-12-23 19:46:23 +08:00
committed by GitHub
parent 04c30521dd
commit 99258e19c8
5 changed files with 17 additions and 13 deletions

View File

@@ -8,7 +8,7 @@
> :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191) > :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191)
> :bulb: 如若此PR是Cherry PickPR标题需遵循格式在最开始加上[Cherry-Pick]标签以及最后面加上原PR ID例如[Cherry-Pick][CI] Add check trigger and logic(#5191) > :bulb: 如若此PR是Cherry PickPR标题需遵循格式在最开始加上[Cherry-Pick]标签以及最后面加上原PR ID例如[Cherry-Pick][CI] Add check trigger and logic(#5191)
## Modifications ## Modifications

View File

@@ -273,7 +273,8 @@ async def async_request_eb_openai_chat_completions(
# 新增metrics统计计算首token过滤空包 # 新增metrics统计计算首token过滤空包
output.metrics = metrics_summary(metrics_list, token_timestamps[1:]) output.metrics = metrics_summary(metrics_list, token_timestamps[1:])
if output.generated_text.strip() == "": # 兼容思考内容超长截断的情况,此时回复内容为空
if output.generated_text.strip() == "" and output.reasoning_content.strip() == "":
output.success = False output.success = False
output.reasoning_tokens = output.output_tokens output.reasoning_tokens = output.output_tokens
output.error = "No generated text found!" output.error = "No generated text found!"

View File

@@ -233,20 +233,23 @@ class EBDataset(BenchmarkDataset):
for entry in self.data: for entry in self.data:
if len(samples) >= num_requests: if len(samples) >= num_requests:
break break
json_data = entry
prompt = entry["text"] prompt = entry["text"]
self.temperature = float(entry["temperature"]) self.temperature = float(entry.get("temperature", 1))
self.repetition_penalty = float(entry["penalty_score"]) self.repetition_penalty = float(entry.get("penalty_score", 0))
self.frequency_penalty = float(entry["frequency_score"]) self.frequency_penalty = float(entry.get("frequency_score", 0))
self.presence_penalty = float(entry["presence_score"]) self.presence_penalty = float(entry.get("presence_score", 0))
self.top_p = float(entry["topp"]) self.top_p = float(entry.get("topp", 1))
self.prompt_len = int(entry["input_token_num"]) self.prompt_len = int(entry.get("input_token_num", 0))
new_output_len = int(entry["max_dec_len"]) new_output_len = int(entry.get("max_dec_len", 0))
if enable_multimodal_chat: if enable_multimodal_chat:
prompt = self.apply_multimodal_chat_transformation(prompt, None) prompt = self.apply_multimodal_chat_transformation(prompt, None)
samples.append( samples.append(
SampleRequest( SampleRequest(
no=cnt, no=cnt,
json_data=json_data,
prompt=prompt, prompt=prompt,
prompt_len=self.prompt_len, prompt_len=self.prompt_len,
history_QA=[], history_QA=[],

View File

@@ -1233,6 +1233,7 @@ if __name__ == "__main__":
type=str, type=str,
default="EBChat", default="EBChat",
choices=[ choices=[
"EB",
"EBChat", "EBChat",
"random", "random",
], ],

View File

@@ -24,9 +24,9 @@ def test_incremental_image_reasoning_consistency():
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg" "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
} },
}, },
{"type": "text", "text": "图中的文物属于哪个年代?"} {"type": "text", "text": "图中的文物属于哪个年代?"},
], ],
} }
], ],
@@ -69,10 +69,9 @@ def test_incremental_image_reasoning_consistency():
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg" "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
} },
}, },
], ],
} }
], ],
"prompt_token_ids": tokens1, "prompt_token_ids": tokens1,