From 412097c1b8cd11220d3140a0214b40a030438de0 Mon Sep 17 00:00:00 2001
From: ophilia-lee <58770600+ophilia-lee@users.noreply.github.com>
Date: Fri, 31 Oct 2025 12:26:24 +0800
Subject: [PATCH] =?UTF-8?q?benchmark=E5=B7=A5=E5=85=B7=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=E5=8F=97=E9=99=90=E8=A7=A3=E7=A0=81=E5=9C=BA=E6=99=AF=E6=8C=87?=
 =?UTF-8?q?=E5=AE=9Aresponse=5Fformat=20(#4718)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 benchmarks/backend_request_func.py | 6 +++++-
 benchmarks/benchmark_dataset.py    | 5 ++++-
 benchmarks/benchmark_serving.py    | 6 ++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 596804331..09eedeb8f 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -51,6 +51,7 @@ class RequestFuncInput:
     ignore_eos: bool = False
     language: Optional[str] = None
     debug: bool = False
+    response_format: Optional[dict] = None
 
 
 @dataclass
@@ -93,8 +94,11 @@ async def async_request_eb_openai_chat_completions(
             "stream_options": {
                 "include_usage": True,
                 "continuous_usage_stats": True,
-            },
+            }
         }
+        if request_func_input.response_format:
+            payload["response_format"] =request_func_input.response_format
+
         # 超参由yaml传入
         payload.update(request_func_input.hyper_parameters)
 
diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
index 3f0078acc..0bc475062 100644
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -45,7 +45,8 @@ class SampleRequest:
     json_data: Optional[dict]
     prompt_len: int
     expected_output_len: int
-
+    response_format: Optional[dict] = None
+    
 
 class BenchmarkDataset(ABC):
     """BenchmarkDataset"""
@@ -297,6 +298,7 @@ class EBChatDataset(BenchmarkDataset):
             json_data = entry
             prompt = entry["messages"][-1].get("content", "")
             history_QA = entry.get("messages", [])
+            response_format = entry.get("response_format")
             new_output_len = int(entry.get("max_tokens", 12288))
 
             if enable_multimodal_chat:
@@ -309,6 +311,7 @@ class EBChatDataset(BenchmarkDataset):
                     prompt_len=0,
                     history_QA=history_QA,
                     expected_output_len=new_output_len,
+                    response_format=response_format
                 )
             )
             cnt += 1
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index b5dff2029..3b779d99c 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -336,6 +336,7 @@ async def benchmark(
         input_requests[0].no,
     )
     test_history_QA = input_requests[0].history_QA
+    response_format = input_requests[0].response_format
 
     test_input = RequestFuncInput(
         model=model_id,
@@ -351,6 +352,7 @@ async def benchmark(
         ignore_eos=ignore_eos,
         debug=debug,
         extra_body=extra_body,
+        response_format=response_format
     )
 
     print("test_input:", test_input)
@@ -382,6 +384,7 @@ async def benchmark(
             logprobs=logprobs,
             ignore_eos=ignore_eos,
             extra_body=extra_body,
+            response_format=response_format
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success:
@@ -420,6 +423,7 @@ async def benchmark(
             request.no,
         )
         history_QA = request.history_QA
+        response_format = request.response_format
 
         req_model_id, req_model_name = model_id, model_name
         if lora_modules:
@@ -440,6 +444,7 @@ async def benchmark(
             debug=debug,
             ignore_eos=ignore_eos,
             extra_body=extra_body,
+            response_format=response_format
         )
         tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
     outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -455,6 +460,7 @@ async def benchmark(
             api_url=base_url + "/stop_profile",
             output_len=test_output_len,
             logprobs=logprobs,
+            response_format=response_format
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success: