update (#5298)

2025-12-24 13:28:13 +08:00 · 2025-11-28 18:29:16 +08:00
parent a535050b11
commit 5b49142988
4 changed files with 561 additions and 29 deletions
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -58,7 +58,7 @@ python benchmark_serving.py \
  --port 9812 \
  --dataset-name EBChat \
  --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
  --num-prompts 1 \
@@ -78,7 +78,7 @@ python benchmark_serving.py \
  --port 9812 \
  --dataset-name EBChat \
  --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
  --num-prompts 2000 \
@@ -100,7 +100,7 @@ python benchmark_serving.py \
  --port 9812 \
  --dataset-name EBChat \
  --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
-  --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
+  --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
  --num-prompts 2000 \
@@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \
 --dataset-name：指定数据集类，指定为"EBChat"可读取转存的FD格式数据集
 --dataset-path：测试数据集路径
 ```
+
+### 指定输入输出长度，构造随机纯文输入测试
+
+相关参数：
+- --dataset-name：指定数据集类，指定为"random"可构造随机纯文输入
+- --random-input-len：随机输入长度，对应英文单词数，默认200
+- --random-output-len：随机输出长度，默认1024
+- --random-range-ratio：输入输出长度变化范围比，[length *(1 - range_ratio), length* (1 + range_ratio)]，默认0.1
+
+#### 使用方式：
+```bash
+python benchmark_serving.py \
+  --backend openai-chat \
+  --model EB45T \
+  --endpoint /v1/chat/completions \
+  --host 0.0.0.0 \
+  --port 9812 \
+  --dataset-name random \
+  --random-input-len 200 \
+  --random-output-len 1024 \
+  --random-range-ratio 0.1 \
+  --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
+  --metric-percentiles 80,95,99,99.9,99.95,99.99 \
+  --num-prompts 2000 \
+  --max-concurrency 100 \
+  --save-result > infer_log.txt 2>&1 &
+```
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -52,6 +52,7 @@ class RequestFuncInput:
    language: Optional[str] = None
    debug: bool = False
    response_format: Optional[dict] = None
+    random_flag: bool = False


@dataclass
@@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions(
        # 超参由yaml传入
        payload.update(request_func_input.hyper_parameters)

+        # 随机输入开关
+        if request_func_input.random_flag:
+            payload["max_tokens"] = request_func_input.output_len
+            metadata = payload.get("metadata", {})
+            metadata["min_tokens"] = request_func_input.output_len
+            payload["metadata"] = metadata
+
        if request_func_input.ignore_eos:
            payload["ignore_eos"] = request_func_input.ignore_eos

--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -46,6 +46,7 @@ class SampleRequest:
    prompt_len: int
    expected_output_len: int
    response_format: Optional[dict] = None
+    random_flag: bool = False


 class BenchmarkDataset(ABC):
@@ -318,3 +319,499 @@ class EBChatDataset(BenchmarkDataset):

        self.maybe_oversample_requests(samples, num_requests)
        return samples
+
+
+class RandomTextDataset(BenchmarkDataset):
+    """
+    Generates random English words for pure text benchmarking.
+    """
+
+    # Common English words vocabulary
+    COMMON_WORDS = [
+        "the",
+        "be",
+        "to",
+        "of",
+        "and",
+        "a",
+        "in",
+        "that",
+        "have",
+        "i",
+        "it",
+        "for",
+        "not",
+        "on",
+        "with",
+        "he",
+        "as",
+        "you",
+        "do",
+        "at",
+        "this",
+        "but",
+        "his",
+        "by",
+        "from",
+        "they",
+        "we",
+        "say",
+        "her",
+        "she",
+        "or",
+        "an",
+        "will",
+        "my",
+        "one",
+        "all",
+        "would",
+        "there",
+        "their",
+        "what",
+        "so",
+        "up",
+        "out",
+        "if",
+        "about",
+        "who",
+        "get",
+        "which",
+        "go",
+        "me",
+        "when",
+        "make",
+        "can",
+        "like",
+        "time",
+        "no",
+        "just",
+        "him",
+        "know",
+        "take",
+        "people",
+        "into",
+        "year",
+        "your",
+        "good",
+        "some",
+        "could",
+        "them",
+        "see",
+        "other",
+        "than",
+        "then",
+        "now",
+        "look",
+        "only",
+        "come",
+        "its",
+        "over",
+        "think",
+        "also",
+        "back",
+        "after",
+        "use",
+        "two",
+        "how",
+        "our",
+        "work",
+        "first",
+        "well",
+        "way",
+        "even",
+        "new",
+        "want",
+        "because",
+        "any",
+        "these",
+        "give",
+        "day",
+        "most",
+        "us",
+        "is",
+        "are",
+        "was",
+        "were",
+        "been",
+        "has",
+        "had",
+        "did",
+        "done",
+        "said",
+        "told",
+        "asked",
+        "thought",
+        "went",
+        "saw",
+        "looked",
+        "found",
+        "took",
+        "gave",
+        "made",
+        "put",
+        "set",
+        "got",
+        "ran",
+        "came",
+        "walked",
+        "stood",
+        "sat",
+        "lay",
+        "felt",
+        "heard",
+        "saw",
+        "knew",
+        "thought",
+        "understood",
+        "believed",
+        "wanted",
+        "needed",
+        "liked",
+        "loved",
+        "hated",
+        "feared",
+        "hoped",
+        "expected",
+        "planned",
+        "decided",
+        "agreed",
+        "disagreed",
+        "argued",
+        "discussed",
+        "explained",
+        "described",
+        "reported",
+        "announced",
+        "declared",
+        "stated",
+        "claimed",
+        "suggested",
+        "proposed",
+        "recommended",
+        "advised",
+        "warned",
+        "threatened",
+        "promised",
+        "offered",
+        "refused",
+        "denied",
+        "admitted",
+        "confessed",
+        "apologized",
+        "forgave",
+        "thanked",
+        "congratulated",
+        "celebrated",
+        "welcomed",
+        "greeted",
+        "introduced",
+        "presented",
+        "showed",
+        "demonstrated",
+        "proved",
+        "tested",
+        "examined",
+        "studied",
+        "learned",
+        "taught",
+        "trained",
+        "practiced",
+        "performed",
+        "played",
+        "worked",
+        "built",
+        "created",
+        "designed",
+        "developed",
+        "improved",
+        "changed",
+        "fixed",
+        "solved",
+        "completed",
+        "finished",
+        "started",
+        "began",
+        "continued",
+        "stopped",
+        "ended",
+        "left",
+        "arrived",
+        "departed",
+        "traveled",
+        "moved",
+        "stayed",
+        "waited",
+        "rested",
+        "slept",
+        "woke",
+        "ate",
+        "drank",
+        "cooked",
+        "cleaned",
+        "washed",
+        "dressed",
+        "undressed",
+        "showered",
+        "bathed",
+        "brushed",
+        "combed",
+        "shaved",
+        "cut",
+        "trimmed",
+        "painted",
+        "drew",
+        "wrote",
+        "read",
+        "spoke",
+        "listened",
+        "heard",
+        "saw",
+        "watched",
+        "looked",
+        "observed",
+        "noticed",
+        "recognized",
+        "remembered",
+        "forgot",
+        "learned",
+        "understood",
+        "knew",
+        "believed",
+        "doubted",
+        "wondered",
+        "thought",
+        "considered",
+        "decided",
+        "chose",
+        "selected",
+        "preferred",
+        "liked",
+        "loved",
+        "hated",
+        "feared",
+        "worried",
+        "hoped",
+        "expected",
+        "planned",
+        "prepared",
+        "organized",
+        "arranged",
+        "scheduled",
+        "timed",
+        "measured",
+        "counted",
+        "calculated",
+        "estimated",
+        "valued",
+        "priced",
+        "cost",
+        "paid",
+        "bought",
+        "sold",
+        "traded",
+        "exchanged",
+        "shared",
+        "divided",
+        "combined",
+        "joined",
+        "connected",
+        "attached",
+        "separated",
+        "divided",
+        "cut",
+        "broke",
+        "fixed",
+        "repaired",
+        "built",
+        "created",
+        "made",
+        "produced",
+        "manufactured",
+        "assembled",
+        "constructed",
+        "designed",
+        "planned",
+        "developed",
+        "improved",
+        "enhanced",
+        "changed",
+        "modified",
+        "adjusted",
+        "adapted",
+        "converted",
+        "transformed",
+        "turned",
+        "became",
+        "grew",
+        "developed",
+        "evolved",
+        "progressed",
+        "advanced",
+        "moved",
+        "went",
+        "came",
+        "arrived",
+        "departed",
+        "left",
+        "returned",
+        "went back",
+        "came back",
+        "arrived back",
+        "departed again",
+        "left again",
+        "returned again",
+        "went away",
+        "came close",
+        "moved away",
+        "approached",
+        "reached",
+        "arrived at",
+        "departed from",
+        "left from",
+        "returned to",
+        "went to",
+        "came from",
+        "traveled to",
+        "traveled from",
+        "moved to",
+        "moved from",
+        "stayed at",
+        "remained at",
+        "waited for",
+        "rested at",
+        "slept at",
+        "woke up at",
+        "ate at",
+        "drank at",
+        "cooked at",
+        "cleaned at",
+        "washed at",
+        "dressed at",
+        "undressed at",
+        "showered at",
+        "bathed at",
+        "brushed at",
+        "combed at",
+        "shaved at",
+        "cut at",
+        "trimmed at",
+        "painted at",
+        "drew at",
+        "wrote at",
+        "read at",
+        "spoke at",
+        "listened at",
+        "heard at",
+        "saw at",
+        "watched at",
+        "looked at",
+        "observed at",
+        "noticed at",
+        "recognized at",
+        "remembered at",
+        "forgot at",
+        "learned at",
+        "understood at",
+        "knew at",
+        "believed at",
+        "doubted at",
+        "wondered at",
+        "thought at",
+        "considered at",
+        "decided at",
+        "chose at",
+        "selected at",
+        "preferred at",
+        "liked at",
+        "loved at",
+        "hated at",
+        "feared at",
+        "worried at",
+        "hoped at",
+        "expected at",
+        "planned at",
+        "prepared at",
+        "organized at",
+        "arranged at",
+        "scheduled at",
+        "timed at",
+        "measured at",
+        "counted at",
+        "calculated at",
+        "estimated at",
+        "valued at",
+        "priced at",
+        "cost at",
+        "paid at",
+        "bought at",
+        "sold at",
+        "traded at",
+        "exchanged at",
+        "shared at",
+        "divided at",
+        "combined at",
+        "joined at",
+        "connected at",
+        "attached at",
+        "separated at",
+        "divided at",
+        "cut at",
+        "broke at",
+        "fixed at",
+        "repaired at",
+        "built at",
+        "created at",
+        "made at",
+        "produced at",
+        "manufactured at",
+    ]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def sample(
+        self,
+        num_requests: int,
+        lora_path: Optional[str] = None,
+        max_loras: Optional[int] = None,
+        random_input_len: Optional[int] = None,
+        random_output_len: Optional[int] = None,
+        random_range_ratio: Optional[float] = None,
+        enable_multimodal_chat: bool = False,
+        **kwargs,
+    ) -> list:
+        samples = []
+
+        def sample_len(base_len: int, ratio: float) -> int:
+            if base_len is None:
+                return None
+            if ratio is None or ratio <= 0:
+                return base_len
+            lo = max(1, int(base_len * (1 - ratio)))
+            hi = int(base_len * (1 + ratio))
+            return random.randint(lo, hi)
+
+        for i in range(1, num_requests + 1):
+            # [length * (1 - range_ratio), length * (1 + range_ratio)]
+            sampled_input_len = sample_len(random_input_len, random_range_ratio)
+            sampled_output_len = sample_len(random_output_len, random_range_ratio)
+
+            words = [random.choice(self.COMMON_WORDS) for _ in range(sampled_input_len)]
+            prompt_text = " ".join(words)
+
+            data = {
+                "messages": [{"role": "user", "content": prompt_text}],
+            }
+
+            samples.append(
+                SampleRequest(
+                    no=i,
+                    json_data=data,
+                    prompt=prompt_text,
+                    prompt_len=sampled_input_len,
+                    history_QA=data["messages"],
+                    expected_output_len=sampled_output_len,
+                    random_flag=True,
+                )
+            )
+        return samples
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -39,7 +39,7 @@ from backend_request_func import (
    RequestFuncInput,
    RequestFuncOutput,
 )
-from benchmark_dataset import EBChatDataset, EBDataset, SampleRequest
+from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest
 from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from tqdm.asyncio import tqdm

@@ -337,6 +337,7 @@ async def benchmark(
    )
    test_history_QA = input_requests[0].history_QA
    response_format = input_requests[0].response_format
+    random_flag = input_requests[0].random_flag

    test_input = RequestFuncInput(
        model=model_id,
@@ -353,6 +354,7 @@ async def benchmark(
        debug=debug,
        extra_body=extra_body,
        response_format=response_format,
+        random_flag=random_flag,
    )

    print("test_input:", test_input)
@@ -385,6 +387,7 @@ async def benchmark(
            ignore_eos=ignore_eos,
            extra_body=extra_body,
            response_format=response_format,
+            random_flag=random_flag,
        )
        profile_output = await request_func(request_func_input=profile_input)
        if profile_output.success:
@@ -424,6 +427,7 @@ async def benchmark(
        )
        history_QA = request.history_QA
        response_format = request.response_format
+        random_flag = request.random_flag

        req_model_id, req_model_name = model_id, model_name
        if lora_modules:
@@ -445,6 +449,7 @@ async def benchmark(
            ignore_eos=ignore_eos,
            extra_body=extra_body,
            response_format=response_format,
+            random_flag=random_flag,
        )
        tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
    outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -461,6 +466,7 @@ async def benchmark(
            output_len=test_output_len,
            logprobs=logprobs,
            response_format=response_format,
+            random_flag=random_flag,
        )
        profile_output = await request_func(request_func_input=profile_input)
        if profile_output.success:
@@ -498,6 +504,12 @@ async def benchmark(
        benchmark_duration = time.perf_counter() - benchmark_start_time
        print(f"benchmark_duration: {benchmark_duration} 秒")

+    if random_flag:
+        print("指定随机输入输出长度测试")
+        print(f"random_input_len: {args.random_input_len}")
+        print(f"random_output_len: {args.random_output_len}")
+        print(f"random_range_ratio: {args.random_range_ratio}")
+
    metrics, actual_output_lens = calculate_metrics(
        # input_requests=input_requests,
        outputs=benchmark_outputs,
@@ -866,6 +878,12 @@ def main(args: argparse.Namespace):
            num_requests=args.num_prompts,
            output_len=args.sharegpt_output_len,
        ),
+        "random": lambda: RandomTextDataset().sample(
+            num_requests=args.num_prompts,
+            random_input_len=args.random_input_len,
+            random_output_len=args.random_output_len,
+            random_range_ratio=args.random_range_ratio,
+        ),
    }

    try:
@@ -1021,15 +1039,10 @@ if __name__ == "__main__":
    parser.add_argument(
        "--dataset-name",
        type=str,
-        default="sharegpt",
+        default="EBChat",
        choices=[
-            "sharegpt",
-            "burstgpt",
-            "sonnet",
-            "random",
-            "hf",
-            "EB",
            "EBChat",
+            "random",
        ],
        help="Name of the dataset to benchmark on.",
    )
@@ -1247,37 +1260,24 @@ if __name__ == "__main__":
    random_group.add_argument(
        "--random-input-len",
        type=int,
-        default=1024,
-        help="Number of input tokens per request, used only for random sampling.",
+        default=200,
+        help="Number of input English words per request, used only for random-text dataset.",
    )
    random_group.add_argument(
        "--random-output-len",
        type=int,
-        default=128,
-        help="Number of output tokens per request, used only for random sampling.",
+        default=1024,
+        help="Number of output tokens per request, used both for random and random-text datasets.",
    )
    random_group.add_argument(
        "--random-range-ratio",
        type=float,
-        default=0.0,
+        default=0.1,
        help="Range ratio for sampling input/output length, "
        "used only for random sampling. Must be in the range [0, 1) to define "
        "a symmetric sampling range"
        "[length * (1 - range_ratio), length * (1 + range_ratio)].",
    )
-    random_group.add_argument(
-        "--random-prefix-len",
-        type=int,
-        default=0,
-        help=(
-            "Number of fixed prefix tokens before the random context "
-            "in a request. "
-            "The total input length is the sum of `random-prefix-len` and "
-            "a random "
-            "context length sampled from [input_len * (1 - range_ratio), "
-            "input_len * (1 + range_ratio)]."
-        ),
-    )

    hf_group = parser.add_argument_group("hf dataset options")
    hf_group.add_argument("--hf-subset", type=str, default=None, help="Subset of the HF dataset.")