update benchmark scripts (#4497)

2025-12-24 13:28:13 +08:00 · 2025-10-20 17:03:10 +08:00
parent b8d235445e
commit 10e85daf15
2 changed files with 7 additions and 2 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -112,6 +112,7 @@ async def async_request_eb_openai_chat_completions(
        output = RequestFuncOutput()
        output.prompt_len = 0
        output.no = request_func_input.no
+        request_id = "None"

        ttft = 0.0
        st = time.perf_counter()
@@ -131,6 +132,9 @@ async def async_request_eb_openai_chat_completions(
                            timestamp = time.perf_counter()
                            data = json.loads(chunk)

+                            if request_id == "None" and "id" in data:
+                                request_id = data["id"]
+
                            if choices := data.get("choices"):
                                content = choices[0]["delta"].get("content")
                                reason_content = choices[0]["delta"].get("reasoning_content")
@@ -175,12 +179,13 @@ async def async_request_eb_openai_chat_completions(
                    )
                    output.error = error_text or ""
                    output.success = False
-                output.request_id = data.get("id", "")
        except Exception:
            output.success = False
            exc_info = sys.exc_info()
            output.error = "".join(traceback.format_exception(*exc_info))

+        output.request_id = request_id
+
        # 保存失败请求结果
        if not output.success:
            with open("error_output.txt", "a") as f:
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -177,7 +177,7 @@ def calculate_metrics(
            output_len = outputs[i].output_tokens

            if not output_len:
-                print("no output_len")
+                print("no output_len", outputs[i])
                # We use the tokenizer to count the number of output tokens
                # for some serving backends instead of looking at
                # len(outputs[i].itl) since multiple output tokens may be