From 5b491429885f5f5d2e6635cd97bca22323d23e4b Mon Sep 17 00:00:00 2001
From: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
Date: Fri, 28 Nov 2025 18:29:16 +0800
Subject: [PATCH] update (#5298)

---
 benchmarks/README.md               |  33 +-
 benchmarks/backend_request_func.py |   8 +
 benchmarks/benchmark_dataset.py    | 497 +++++++++++++++++++++++++++++
 benchmarks/benchmark_serving.py    |  52 +--
 4 files changed, 561 insertions(+), 29 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index bac077ffd..8cd9b9fce 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -58,7 +58,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 1 \
@@ -78,7 +78,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 2000 \
@@ -100,7 +100,7 @@ python benchmark_serving.py \
   --port 9812 \
   --dataset-name EBChat \
   --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
   --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
   --metric-percentiles 80,95,99,99.9,99.95,99.99 \
   --num-prompts 2000 \
@@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \
 --dataset-name：指定数据集类，指定为"EBChat"可读取转存的FD格式数据集
 --dataset-path：测试数据集路径
 ```
+
+### 指定输入输出长度，构造随机纯文输入测试
+
+相关参数：
+- --dataset-name：指定数据集类，指定为"random"可构造随机纯文输入
+- --random-input-len：随机输入长度，对应英文单词数，默认200
+- --random-output-len：随机输出长度，默认1024
+- --random-range-ratio：输入输出长度变化范围比，[length *(1 - range_ratio), length* (1 + range_ratio)]，默认0.1
+
+#### 使用方式：
+```bash
+python benchmark_serving.py \
+  --backend openai-chat \
+  --model EB45T \
+  --endpoint /v1/chat/completions \
+  --host 0.0.0.0 \
+  --port 9812 \
+  --dataset-name random \
+  --random-input-len 200 \
+  --random-output-len 1024 \
+  --random-range-ratio 0.1 \
+  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
+  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
+  --num-prompts 2000 \
+  --max-concurrency 100 \
+  --save-result > infer_log.txt 2>&1 &
+```
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 2ccb4e345..6e1988239 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -52,6 +52,7 @@ class RequestFuncInput:
     language: Optional[str] = None
     debug: bool = False
     response_format: Optional[dict] = None
+    random_flag: bool = False
 
 
 @dataclass
@@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions(
         # 超参由yaml传入
         payload.update(request_func_input.hyper_parameters)
 
+        # 随机输入开关
+        if request_func_input.random_flag:
+            payload["max_tokens"] = request_func_input.output_len
+            metadata = payload.get("metadata", {})
+            metadata["min_tokens"] = request_func_input.output_len
+            payload["metadata"] = metadata
+
         if request_func_input.ignore_eos:
             payload["ignore_eos"] = request_func_input.ignore_eos
 
diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
index e9552c6d2..8c35867ad 100644
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -46,6 +46,7 @@ class SampleRequest:
     prompt_len: int
     expected_output_len: int
     response_format: Optional[dict] = None
+    random_flag: bool = False
 
 
 class BenchmarkDataset(ABC):
@@ -318,3 +319,499 @@ class EBChatDataset(BenchmarkDataset):
 
         self.maybe_oversample_requests(samples, num_requests)
         return samples
+
+
+class RandomTextDataset(BenchmarkDataset):
+    """
+    Generates random English words for pure text benchmarking.
+    """
+
+    # Common English words vocabulary
+    COMMON_WORDS = [
+        "the",
+        "be",
+        "to",
+        "of",
+        "and",
+        "a",
+        "in",
+        "that",
+        "have",
+        "i",
+        "it",
+        "for",
+        "not",
+        "on",
+        "with",
+        "he",
+        "as",
+        "you",
+        "do",
+        "at",
+        "this",
+        "but",
+        "his",
+        "by",
+        "from",
+        "they",
+        "we",
+        "say",
+        "her",
+        "she",
+        "or",
+        "an",
+        "will",
+        "my",
+        "one",
+        "all",
+        "would",
+        "there",
+        "their",
+        "what",
+        "so",
+        "up",
+        "out",
+        "if",
+        "about",
+        "who",
+        "get",
+        "which",
+        "go",
+        "me",
+        "when",
+        "make",
+        "can",
+        "like",
+        "time",
+        "no",
+        "just",
+        "him",
+        "know",
+        "take",
+        "people",
+        "into",
+        "year",
+        "your",
+        "good",
+        "some",
+        "could",
+        "them",
+        "see",
+        "other",
+        "than",
+        "then",
+        "now",
+        "look",
+        "only",
+        "come",
+        "its",
+        "over",
+        "think",
+        "also",
+        "back",
+        "after",
+        "use",
+        "two",
+        "how",
+        "our",
+        "work",
+        "first",
+        "well",
+        "way",
+        "even",
+        "new",
+        "want",
+        "because",
+        "any",
+        "these",
+        "give",
+        "day",
+        "most",
+        "us",
+        "is",
+        "are",
+        "was",
+        "were",
+        "been",
+        "has",
+        "had",
+        "did",
+        "done",
+        "said",
+        "told",
+        "asked",
+        "thought",
+        "went",
+        "saw",
+        "looked",
+        "found",
+        "took",
+        "gave",
+        "made",
+        "put",
+        "set",
+        "got",
+        "ran",
+        "came",
+        "walked",
+        "stood",
+        "sat",
+        "lay",
+        "felt",
+        "heard",
+        "saw",
+        "knew",
+        "thought",
+        "understood",
+        "believed",
+        "wanted",
+        "needed",
+        "liked",
+        "loved",
+        "hated",
+        "feared",
+        "hoped",
+        "expected",
+        "planned",
+        "decided",
+        "agreed",
+        "disagreed",
+        "argued",
+        "discussed",
+        "explained",
+        "described",
+        "reported",
+        "announced",
+        "declared",
+        "stated",
+        "claimed",
+        "suggested",
+        "proposed",
+        "recommended",
+        "advised",
+        "warned",
+        "threatened",
+        "promised",
+        "offered",
+        "refused",
+        "denied",
+        "admitted",
+        "confessed",
+        "apologized",
+        "forgave",
+        "thanked",
+        "congratulated",
+        "celebrated",
+        "welcomed",
+        "greeted",
+        "introduced",
+        "presented",
+        "showed",
+        "demonstrated",
+        "proved",
+        "tested",
+        "examined",
+        "studied",
+        "learned",
+        "taught",
+        "trained",
+        "practiced",
+        "performed",
+        "played",
+        "worked",
+        "built",
+        "created",
+        "designed",
+        "developed",
+        "improved",
+        "changed",
+        "fixed",
+        "solved",
+        "completed",
+        "finished",
+        "started",
+        "began",
+        "continued",
+        "stopped",
+        "ended",
+        "left",
+        "arrived",
+        "departed",
+        "traveled",
+        "moved",
+        "stayed",
+        "waited",
+        "rested",
+        "slept",
+        "woke",
+        "ate",
+        "drank",
+        "cooked",
+        "cleaned",
+        "washed",
+        "dressed",
+        "undressed",
+        "showered",
+        "bathed",
+        "brushed",
+        "combed",
+        "shaved",
+        "cut",
+        "trimmed",
+        "painted",
+        "drew",
+        "wrote",
+        "read",
+        "spoke",
+        "listened",
+        "heard",
+        "saw",
+        "watched",
+        "looked",
+        "observed",
+        "noticed",
+        "recognized",
+        "remembered",
+        "forgot",
+        "learned",
+        "understood",
+        "knew",
+        "believed",
+        "doubted",
+        "wondered",
+        "thought",
+        "considered",
+        "decided",
+        "chose",
+        "selected",
+        "preferred",
+        "liked",
+        "loved",
+        "hated",
+        "feared",
+        "worried",
+        "hoped",
+        "expected",
+        "planned",
+        "prepared",
+        "organized",
+        "arranged",
+        "scheduled",
+        "timed",
+        "measured",
+        "counted",
+        "calculated",
+        "estimated",
+        "valued",
+        "priced",
+        "cost",
+        "paid",
+        "bought",
+        "sold",
+        "traded",
+        "exchanged",
+        "shared",
+        "divided",
+        "combined",
+        "joined",
+        "connected",
+        "attached",
+        "separated",
+        "divided",
+        "cut",
+        "broke",
+        "fixed",
+        "repaired",
+        "built",
+        "created",
+        "made",
+        "produced",
+        "manufactured",
+        "assembled",
+        "constructed",
+        "designed",
+        "planned",
+        "developed",
+        "improved",
+        "enhanced",
+        "changed",
+        "modified",
+        "adjusted",
+        "adapted",
+        "converted",
+        "transformed",
+        "turned",
+        "became",
+        "grew",
+        "developed",
+        "evolved",
+        "progressed",
+        "advanced",
+        "moved",
+        "went",
+        "came",
+        "arrived",
+        "departed",
+        "left",
+        "returned",
+        "went back",
+        "came back",
+        "arrived back",
+        "departed again",
+        "left again",
+        "returned again",
+        "went away",
+        "came close",
+        "moved away",
+        "approached",
+        "reached",
+        "arrived at",
+        "departed from",
+        "left from",
+        "returned to",
+        "went to",
+        "came from",
+        "traveled to",
+        "traveled from",
+        "moved to",
+        "moved from",
+        "stayed at",
+        "remained at",
+        "waited for",
+        "rested at",
+        "slept at",
+        "woke up at",
+        "ate at",
+        "drank at",
+        "cooked at",
+        "cleaned at",
+        "washed at",
+        "dressed at",
+        "undressed at",
+        "showered at",
+        "bathed at",
+        "brushed at",
+        "combed at",
+        "shaved at",
+        "cut at",
+        "trimmed at",
+        "painted at",
+        "drew at",
+        "wrote at",
+        "read at",
+        "spoke at",
+        "listened at",
+        "heard at",
+        "saw at",
+        "watched at",
+        "looked at",
+        "observed at",
+        "noticed at",
+        "recognized at",
+        "remembered at",
+        "forgot at",
+        "learned at",
+        "understood at",
+        "knew at",
+        "believed at",
+        "doubted at",
+        "wondered at",
+        "thought at",
+        "considered at",
+        "decided at",
+        "chose at",
+        "selected at",
+        "preferred at",
+        "liked at",
+        "loved at",
+        "hated at",
+        "feared at",
+        "worried at",
+        "hoped at",
+        "expected at",
+        "planned at",
+        "prepared at",
+        "organized at",
+        "arranged at",
+        "scheduled at",
+        "timed at",
+        "measured at",
+        "counted at",
+        "calculated at",
+        "estimated at",
+        "valued at",
+        "priced at",
+        "cost at",
+        "paid at",
+        "bought at",
+        "sold at",
+        "traded at",
+        "exchanged at",
+        "shared at",
+        "divided at",
+        "combined at",
+        "joined at",
+        "connected at",
+        "attached at",
+        "separated at",
+        "divided at",
+        "cut at",
+        "broke at",
+        "fixed at",
+        "repaired at",
+        "built at",
+        "created at",
+        "made at",
+        "produced at",
+        "manufactured at",
+    ]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def sample(
+        self,
+        num_requests: int,
+        lora_path: Optional[str] = None,
+        max_loras: Optional[int] = None,
+        random_input_len: Optional[int] = None,
+        random_output_len: Optional[int] = None,
+        random_range_ratio: Optional[float] = None,
+        enable_multimodal_chat: bool = False,
+        **kwargs,
+    ) -> list:
+        samples = []
+
+        def sample_len(base_len: int, ratio: float) -> int:
+            if base_len is None:
+                return None
+            if ratio is None or ratio <= 0:
+                return base_len
+            lo = max(1, int(base_len * (1 - ratio)))
+            hi = int(base_len * (1 + ratio))
+            return random.randint(lo, hi)
+
+        for i in range(1, num_requests + 1):
+            # [length * (1 - range_ratio), length * (1 + range_ratio)]
+            sampled_input_len = sample_len(random_input_len, random_range_ratio)
+            sampled_output_len = sample_len(random_output_len, random_range_ratio)
+
+            words = [random.choice(self.COMMON_WORDS) for _ in range(sampled_input_len)]
+            prompt_text = " ".join(words)
+
+            data = {
+                "messages": [{"role": "user", "content": prompt_text}],
+            }
+
+            samples.append(
+                SampleRequest(
+                    no=i,
+                    json_data=data,
+                    prompt=prompt_text,
+                    prompt_len=sampled_input_len,
+                    history_QA=data["messages"],
+                    expected_output_len=sampled_output_len,
+                    random_flag=True,
+                )
+            )
+        return samples
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index ca721e9cb..b9e61ef7a 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -39,7 +39,7 @@ from backend_request_func import (
     RequestFuncInput,
     RequestFuncOutput,
 )
-from benchmark_dataset import EBChatDataset, EBDataset, SampleRequest
+from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest
 from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from tqdm.asyncio import tqdm
 
@@ -337,6 +337,7 @@ async def benchmark(
     )
     test_history_QA = input_requests[0].history_QA
     response_format = input_requests[0].response_format
+    random_flag = input_requests[0].random_flag
 
     test_input = RequestFuncInput(
         model=model_id,
@@ -353,6 +354,7 @@ async def benchmark(
         debug=debug,
         extra_body=extra_body,
         response_format=response_format,
+        random_flag=random_flag,
     )
 
     print("test_input:", test_input)
@@ -385,6 +387,7 @@ async def benchmark(
             ignore_eos=ignore_eos,
             extra_body=extra_body,
             response_format=response_format,
+            random_flag=random_flag,
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success:
@@ -424,6 +427,7 @@ async def benchmark(
         )
         history_QA = request.history_QA
         response_format = request.response_format
+        random_flag = request.random_flag
 
         req_model_id, req_model_name = model_id, model_name
         if lora_modules:
@@ -445,6 +449,7 @@ async def benchmark(
             ignore_eos=ignore_eos,
             extra_body=extra_body,
             response_format=response_format,
+            random_flag=random_flag,
         )
         tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
     outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -461,6 +466,7 @@ async def benchmark(
             output_len=test_output_len,
             logprobs=logprobs,
             response_format=response_format,
+            random_flag=random_flag,
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success:
@@ -498,6 +504,12 @@ async def benchmark(
         benchmark_duration = time.perf_counter() - benchmark_start_time
         print(f"benchmark_duration: {benchmark_duration} 秒")
 
+    if random_flag:
+        print("指定随机输入输出长度测试")
+        print(f"random_input_len: {args.random_input_len}")
+        print(f"random_output_len: {args.random_output_len}")
+        print(f"random_range_ratio: {args.random_range_ratio}")
+
     metrics, actual_output_lens = calculate_metrics(
         # input_requests=input_requests,
         outputs=benchmark_outputs,
@@ -866,6 +878,12 @@ def main(args: argparse.Namespace):
             num_requests=args.num_prompts,
             output_len=args.sharegpt_output_len,
         ),
+        "random": lambda: RandomTextDataset().sample(
+            num_requests=args.num_prompts,
+            random_input_len=args.random_input_len,
+            random_output_len=args.random_output_len,
+            random_range_ratio=args.random_range_ratio,
+        ),
     }
 
     try:
@@ -1021,15 +1039,10 @@ if __name__ == "__main__":
     parser.add_argument(
         "--dataset-name",
         type=str,
-        default="sharegpt",
+        default="EBChat",
         choices=[
-            "sharegpt",
-            "burstgpt",
-            "sonnet",
-            "random",
-            "hf",
-            "EB",
             "EBChat",
+            "random",
         ],
         help="Name of the dataset to benchmark on.",
     )
@@ -1247,37 +1260,24 @@ if __name__ == "__main__":
     random_group.add_argument(
         "--random-input-len",
         type=int,
-        default=1024,
-        help="Number of input tokens per request, used only for random sampling.",
+        default=200,
+        help="Number of input English words per request, used only for random-text dataset.",
     )
     random_group.add_argument(
         "--random-output-len",
         type=int,
-        default=128,
-        help="Number of output tokens per request, used only for random sampling.",
+        default=1024,
+        help="Number of output tokens per request, used both for random and random-text datasets.",
     )
     random_group.add_argument(
         "--random-range-ratio",
         type=float,
-        default=0.0,
+        default=0.1,
         help="Range ratio for sampling input/output length, "
         "used only for random sampling. Must be in the range [0, 1) to define "
         "a symmetric sampling range"
         "[length * (1 - range_ratio), length * (1 + range_ratio)].",
     )
-    random_group.add_argument(
-        "--random-prefix-len",
-        type=int,
-        default=0,
-        help=(
-            "Number of fixed prefix tokens before the random context "
-            "in a request. "
-            "The total input length is the sum of `random-prefix-len` and "
-            "a random "
-            "context length sampled from [input_len * (1 - range_ratio), "
-            "input_len * (1 + range_ratio)]."
-        ),
-    )
 
     hf_group = parser.add_argument_group("hf dataset options")
     hf_group.add_argument("--hf-subset", type=str, default=None, help="Subset of the HF dataset.")