benchmark工具适配SGLang框架 (#4607)

* benchmark工具适配SGLang框架

* benchmark工具适配SGLang框架

* benchmark工具适配SGLang框架
This commit is contained in:
ophilia-lee
2025-10-27 18:52:56 +08:00
committed by GitHub
parent c91c5040c4
commit 70aa7423f8
3 changed files with 17 additions and 13 deletions

View File

@@ -128,13 +128,13 @@ async def async_request_eb_openai_chat_completions(
chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
if chunk != "[DONE]":
# print("####chunk:", chunk, type(chunk))
#print("####chunk:", chunk, type(chunk))
timestamp = time.perf_counter()
data = json.loads(chunk)
if request_id == "None" and "id" in data:
request_id = data["id"]
if choices := data.get("choices"):
content = choices[0]["delta"].get("content")
reason_content = choices[0]["delta"].get("reasoning_content")
@@ -143,9 +143,12 @@ async def async_request_eb_openai_chat_completions(
ttft = timestamp - st
output.ttft = ttft
# cached_tokens
output.prompt_len = (
data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
)
if data["usage"] and data["usage"].get("prompt_tokens_details", {}):
output.prompt_len = (
data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
)
else:
output.prompt_len = 0
# Decoding phase
else:
@@ -157,6 +160,7 @@ async def async_request_eb_openai_chat_completions(
elif usage := data.get("usage", {}):
output.output_tokens = usage.get("completion_tokens", 0)
output.prompt_tokens = usage.get("prompt_tokens", 0)
most_recent_timestamp = timestamp

View File

@@ -635,7 +635,7 @@ def benchmark_metrics(
goodput_config_dict = check_goodput_args(args)
metrics, actual_output_lens = calculate_metrics(
input_requests=input_requests,
# input_requests=input_requests,
outputs=outputs,
dur_s=benchmark_duration,
selected_percentiles=selected_percentiles,

View File

@@ -1,11 +1,11 @@
top_p: 1.0
temperature: 1.0
metadata:
min_tokens: 1
max_tokens: 30721
top_p: 0.8
temperature: 0.8
max_tokens: 12288
repetition_penalty: 1.0
frequency_penalty: 0
presence_penalty: 0
skip_special_tokens: false
metadata:
enable_thinking: false
min_tokens: 1
chat_template_kwargs:
enable_thinking: true
enable_thinking: false