mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Benchmark]支持Completions接口 (#5700)
* benchmark工具支持受限解码场景指定response_format * Update backend_request_func.py output.success判断兼容思考内容超长截断时回复内容为空的情况 * Update benchmark_serving.py 更新benchmark_metrics * 支持Completions接口 * 支持Completions接口 * 支持Completions接口 * [Benchmark]支持Completions接口 * [Benchmark]支持Completions接口 --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
> :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191)
|
> :bulb: If this PR is a Cherry Pick, the PR title needs to follow the format by adding the [Cherry-Pick] label at the very beginning and appending the original PR ID at the end. For example, [Cherry-Pick][CI] Add check trigger and logic(#5191)
|
||||||
|
|
||||||
> :bulb: 如若此PR是Cherry Pick,PR标题需遵循格式,在最开始加上[Cherry-Pick]标签,以及最后面加上原PR ID,例如[Cherry-Pick][CI] Add check trigger and logic(#5191)
|
> :bulb: 如若此PR是Cherry Pick,PR标题需遵循格式,在最开始加上[Cherry-Pick]标签,以及最后面加上原PR ID,例如[Cherry-Pick][CI] Add check trigger and logic(#5191)
|
||||||
|
|
||||||
## Modifications
|
## Modifications
|
||||||
|
|
||||||
|
|||||||
@@ -273,7 +273,8 @@ async def async_request_eb_openai_chat_completions(
|
|||||||
# 新增metrics统计,计算首token过滤空包
|
# 新增metrics统计,计算首token过滤空包
|
||||||
output.metrics = metrics_summary(metrics_list, token_timestamps[1:])
|
output.metrics = metrics_summary(metrics_list, token_timestamps[1:])
|
||||||
|
|
||||||
if output.generated_text.strip() == "":
|
# 兼容思考内容超长截断的情况,此时回复内容为空
|
||||||
|
if output.generated_text.strip() == "" and output.reasoning_content.strip() == "":
|
||||||
output.success = False
|
output.success = False
|
||||||
output.reasoning_tokens = output.output_tokens
|
output.reasoning_tokens = output.output_tokens
|
||||||
output.error = "No generated text found!"
|
output.error = "No generated text found!"
|
||||||
|
|||||||
@@ -233,20 +233,23 @@ class EBDataset(BenchmarkDataset):
|
|||||||
for entry in self.data:
|
for entry in self.data:
|
||||||
if len(samples) >= num_requests:
|
if len(samples) >= num_requests:
|
||||||
break
|
break
|
||||||
|
json_data = entry
|
||||||
|
|
||||||
prompt = entry["text"]
|
prompt = entry["text"]
|
||||||
self.temperature = float(entry["temperature"])
|
self.temperature = float(entry.get("temperature", 1))
|
||||||
self.repetition_penalty = float(entry["penalty_score"])
|
self.repetition_penalty = float(entry.get("penalty_score", 0))
|
||||||
self.frequency_penalty = float(entry["frequency_score"])
|
self.frequency_penalty = float(entry.get("frequency_score", 0))
|
||||||
self.presence_penalty = float(entry["presence_score"])
|
self.presence_penalty = float(entry.get("presence_score", 0))
|
||||||
self.top_p = float(entry["topp"])
|
self.top_p = float(entry.get("topp", 1))
|
||||||
self.prompt_len = int(entry["input_token_num"])
|
self.prompt_len = int(entry.get("input_token_num", 0))
|
||||||
new_output_len = int(entry["max_dec_len"])
|
new_output_len = int(entry.get("max_dec_len", 0))
|
||||||
|
|
||||||
if enable_multimodal_chat:
|
if enable_multimodal_chat:
|
||||||
prompt = self.apply_multimodal_chat_transformation(prompt, None)
|
prompt = self.apply_multimodal_chat_transformation(prompt, None)
|
||||||
samples.append(
|
samples.append(
|
||||||
SampleRequest(
|
SampleRequest(
|
||||||
no=cnt,
|
no=cnt,
|
||||||
|
json_data=json_data,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
prompt_len=self.prompt_len,
|
prompt_len=self.prompt_len,
|
||||||
history_QA=[],
|
history_QA=[],
|
||||||
|
|||||||
@@ -1233,6 +1233,7 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
default="EBChat",
|
default="EBChat",
|
||||||
choices=[
|
choices=[
|
||||||
|
"EB",
|
||||||
"EBChat",
|
"EBChat",
|
||||||
"random",
|
"random",
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -24,9 +24,9 @@ def test_incremental_image_reasoning_consistency():
|
|||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
|
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{"type": "text", "text": "图中的文物属于哪个年代?"}
|
{"type": "text", "text": "图中的文物属于哪个年代?"},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -69,10 +69,9 @@ def test_incremental_image_reasoning_consistency():
|
|||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
|
"url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"prompt_token_ids": tokens1,
|
"prompt_token_ids": tokens1,
|
||||||
|
|||||||
Reference in New Issue
Block a user