benchmark工具适配SGLang框架 (#4607)

* benchmark工具适配SGLang框架 * benchmark工具适配SGLang框架 * benchmark工具适配SGLang框架
2025-12-24 13:28:13 +08:00 · 2025-10-27 18:52:56 +08:00
parent c91c5040c4
commit 70aa7423f8
3 changed files with 17 additions and 13 deletions
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -128,13 +128,13 @@ async def async_request_eb_openai_chat_completions(

                        chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
                        if chunk != "[DONE]":
-                            # print("####chunk:", chunk, type(chunk))
+                            #print("####chunk:", chunk, type(chunk))
                            timestamp = time.perf_counter()
                            data = json.loads(chunk)

                            if request_id == "None" and "id" in data:
                                request_id = data["id"]
-
+                            
                            if choices := data.get("choices"):
                                content = choices[0]["delta"].get("content")
                                reason_content = choices[0]["delta"].get("reasoning_content")
@@ -143,9 +143,12 @@ async def async_request_eb_openai_chat_completions(
                                    ttft = timestamp - st
                                    output.ttft = ttft
                                    # cached_tokens
-                                    output.prompt_len = (
-                                        data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
-                                    )
+                                    if data["usage"] and data["usage"].get("prompt_tokens_details", {}):
+                                        output.prompt_len = (
+                                            data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0)
+                                        )
+                                    else:
+                                        output.prompt_len = 0

                                # Decoding phase
                                else:
@@ -157,6 +160,7 @@ async def async_request_eb_openai_chat_completions(
                            elif usage := data.get("usage", {}):
                                output.output_tokens = usage.get("completion_tokens", 0)
                                output.prompt_tokens = usage.get("prompt_tokens", 0)
+                            

                            most_recent_timestamp = timestamp