mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
benchmark工具适配SGLang框架 (#4607)
* benchmark工具适配SGLang框架 * benchmark工具适配SGLang框架 * benchmark工具适配SGLang框架
This commit is contained in:
@@ -128,13 +128,13 @@ async def async_request_eb_openai_chat_completions(
|
||||
|
||||
chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
|
||||
if chunk != "[DONE]":
|
||||
# print("####chunk:", chunk, type(chunk))
|
||||
#print("####chunk:", chunk, type(chunk))
|
||||
timestamp = time.perf_counter()
|
||||
data = json.loads(chunk)
|
||||
|
||||
if request_id == "None" and "id" in data:
|
||||
request_id = data["id"]
|
||||
|
||||
|
||||
if choices := data.get("choices"):
|
||||
content = choices[0]["delta"].get("content")
|
||||
reason_content = choices[0]["delta"].get("reasoning_content")
|
||||
@@ -143,9 +143,12 @@ async def async_request_eb_openai_chat_completions(
|
||||
ttft = timestamp - st
|
||||
output.ttft = ttft
|
||||
# cached_tokens
|
||||
output.prompt_len = (
|
||||
data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
|
||||
)
|
||||
if data["usage"] and data["usage"].get("prompt_tokens_details", {}):
|
||||
output.prompt_len = (
|
||||
data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
|
||||
)
|
||||
else:
|
||||
output.prompt_len = 0
|
||||
|
||||
# Decoding phase
|
||||
else:
|
||||
@@ -157,6 +160,7 @@ async def async_request_eb_openai_chat_completions(
|
||||
elif usage := data.get("usage", {}):
|
||||
output.output_tokens = usage.get("completion_tokens", 0)
|
||||
output.prompt_tokens = usage.get("prompt_tokens", 0)
|
||||
|
||||
|
||||
most_recent_timestamp = timestamp
|
||||
|
||||
|
||||
@@ -635,7 +635,7 @@ def benchmark_metrics(
|
||||
goodput_config_dict = check_goodput_args(args)
|
||||
|
||||
metrics, actual_output_lens = calculate_metrics(
|
||||
input_requests=input_requests,
|
||||
# input_requests=input_requests,
|
||||
outputs=outputs,
|
||||
dur_s=benchmark_duration,
|
||||
selected_percentiles=selected_percentiles,
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
top_p: 1.0
|
||||
temperature: 1.0
|
||||
metadata:
|
||||
min_tokens: 1
|
||||
max_tokens: 30721
|
||||
top_p: 0.8
|
||||
temperature: 0.8
|
||||
max_tokens: 12288
|
||||
repetition_penalty: 1.0
|
||||
frequency_penalty: 0
|
||||
presence_penalty: 0
|
||||
skip_special_tokens: false
|
||||
metadata:
|
||||
enable_thinking: false
|
||||
min_tokens: 1
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
enable_thinking: false
|
||||
Reference in New Issue
Block a user