From 5b491429885f5f5d2e6635cd97bca22323d23e4b Mon Sep 17 00:00:00 2001 From: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com> Date: Fri, 28 Nov 2025 18:29:16 +0800 Subject: [PATCH] update (#5298) --- benchmarks/README.md | 33 +- benchmarks/backend_request_func.py | 8 + benchmarks/benchmark_dataset.py | 497 +++++++++++++++++++++++++++++ benchmarks/benchmark_serving.py | 52 +-- 4 files changed, 561 insertions(+), 29 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index bac077ffd..8cd9b9fce 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -58,7 +58,7 @@ python benchmark_serving.py \ --port 9812 \ --dataset-name EBChat \ --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \ - --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \ + --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \ --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \ --metric-percentiles 80,95,99,99.9,99.95,99.99 \ --num-prompts 1 \ @@ -78,7 +78,7 @@ python benchmark_serving.py \ --port 9812 \ --dataset-name EBChat \ --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \ - --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \ + --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \ --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \ --metric-percentiles 80,95,99,99.9,99.95,99.99 \ --num-prompts 2000 \ @@ -100,7 +100,7 @@ python benchmark_serving.py \ --port 9812 \ --dataset-name EBChat \ --dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \ - --hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \ + --hyperparameter-path yaml/request_yaml/eb45-32k.yaml \ --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \ --metric-percentiles 80,95,99,99.9,99.95,99.99 \ --num-prompts 2000 \ @@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \ --dataset-name:指定数据集类,指定为"EBChat"可读取转存的FD格式数据集 --dataset-path:测试数据集路径 ``` + +### 指定输入输出长度,构造随机纯文输入测试 + +相关参数: +- --dataset-name:指定数据集类,指定为"random"可构造随机纯文输入 +- --random-input-len:随机输入长度,对应英文单词数,默认200 +- --random-output-len:随机输出长度,默认1024 +- --random-range-ratio:输入输出长度变化范围比,[length *(1 - range_ratio), length* (1 + range_ratio)],默认0.1 + +#### 使用方式: +```bash +python benchmark_serving.py \ + --backend openai-chat \ + --model EB45T \ + --endpoint /v1/chat/completions \ + --host 0.0.0.0 \ + --port 9812 \ + --dataset-name random \ + --random-input-len 200 \ + --random-output-len 1024 \ + --random-range-ratio 0.1 \ + --percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \ + --metric-percentiles 80,95,99,99.9,99.95,99.99 \ + --num-prompts 2000 \ + --max-concurrency 100 \ + --save-result > infer_log.txt 2>&1 & +``` diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 2ccb4e345..6e1988239 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -52,6 +52,7 @@ class RequestFuncInput: language: Optional[str] = None debug: bool = False response_format: Optional[dict] = None + random_flag: bool = False @dataclass @@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions( # 超参由yaml传入 payload.update(request_func_input.hyper_parameters) + # 随机输入开关 + if request_func_input.random_flag: + payload["max_tokens"] = request_func_input.output_len + metadata = payload.get("metadata", {}) + metadata["min_tokens"] = request_func_input.output_len + payload["metadata"] = metadata + if request_func_input.ignore_eos: payload["ignore_eos"] = request_func_input.ignore_eos diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index e9552c6d2..8c35867ad 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -46,6 +46,7 @@ class SampleRequest: prompt_len: int expected_output_len: int response_format: Optional[dict] = None + random_flag: bool = False class BenchmarkDataset(ABC): @@ -318,3 +319,499 @@ class EBChatDataset(BenchmarkDataset): self.maybe_oversample_requests(samples, num_requests) return samples + + +class RandomTextDataset(BenchmarkDataset): + """ + Generates random English words for pure text benchmarking. + """ + + # Common English words vocabulary + COMMON_WORDS = [ + "the", + "be", + "to", + "of", + "and", + "a", + "in", + "that", + "have", + "i", + "it", + "for", + "not", + "on", + "with", + "he", + "as", + "you", + "do", + "at", + "this", + "but", + "his", + "by", + "from", + "they", + "we", + "say", + "her", + "she", + "or", + "an", + "will", + "my", + "one", + "all", + "would", + "there", + "their", + "what", + "so", + "up", + "out", + "if", + "about", + "who", + "get", + "which", + "go", + "me", + "when", + "make", + "can", + "like", + "time", + "no", + "just", + "him", + "know", + "take", + "people", + "into", + "year", + "your", + "good", + "some", + "could", + "them", + "see", + "other", + "than", + "then", + "now", + "look", + "only", + "come", + "its", + "over", + "think", + "also", + "back", + "after", + "use", + "two", + "how", + "our", + "work", + "first", + "well", + "way", + "even", + "new", + "want", + "because", + "any", + "these", + "give", + "day", + "most", + "us", + "is", + "are", + "was", + "were", + "been", + "has", + "had", + "did", + "done", + "said", + "told", + "asked", + "thought", + "went", + "saw", + "looked", + "found", + "took", + "gave", + "made", + "put", + "set", + "got", + "ran", + "came", + "walked", + "stood", + "sat", + "lay", + "felt", + "heard", + "saw", + "knew", + "thought", + "understood", + "believed", + "wanted", + "needed", + "liked", + "loved", + "hated", + "feared", + "hoped", + "expected", + "planned", + "decided", + "agreed", + "disagreed", + "argued", + "discussed", + "explained", + "described", + "reported", + "announced", + "declared", + "stated", + "claimed", + "suggested", + "proposed", + "recommended", + "advised", + "warned", + "threatened", + "promised", + "offered", + "refused", + "denied", + "admitted", + "confessed", + "apologized", + "forgave", + "thanked", + "congratulated", + "celebrated", + "welcomed", + "greeted", + "introduced", + "presented", + "showed", + "demonstrated", + "proved", + "tested", + "examined", + "studied", + "learned", + "taught", + "trained", + "practiced", + "performed", + "played", + "worked", + "built", + "created", + "designed", + "developed", + "improved", + "changed", + "fixed", + "solved", + "completed", + "finished", + "started", + "began", + "continued", + "stopped", + "ended", + "left", + "arrived", + "departed", + "traveled", + "moved", + "stayed", + "waited", + "rested", + "slept", + "woke", + "ate", + "drank", + "cooked", + "cleaned", + "washed", + "dressed", + "undressed", + "showered", + "bathed", + "brushed", + "combed", + "shaved", + "cut", + "trimmed", + "painted", + "drew", + "wrote", + "read", + "spoke", + "listened", + "heard", + "saw", + "watched", + "looked", + "observed", + "noticed", + "recognized", + "remembered", + "forgot", + "learned", + "understood", + "knew", + "believed", + "doubted", + "wondered", + "thought", + "considered", + "decided", + "chose", + "selected", + "preferred", + "liked", + "loved", + "hated", + "feared", + "worried", + "hoped", + "expected", + "planned", + "prepared", + "organized", + "arranged", + "scheduled", + "timed", + "measured", + "counted", + "calculated", + "estimated", + "valued", + "priced", + "cost", + "paid", + "bought", + "sold", + "traded", + "exchanged", + "shared", + "divided", + "combined", + "joined", + "connected", + "attached", + "separated", + "divided", + "cut", + "broke", + "fixed", + "repaired", + "built", + "created", + "made", + "produced", + "manufactured", + "assembled", + "constructed", + "designed", + "planned", + "developed", + "improved", + "enhanced", + "changed", + "modified", + "adjusted", + "adapted", + "converted", + "transformed", + "turned", + "became", + "grew", + "developed", + "evolved", + "progressed", + "advanced", + "moved", + "went", + "came", + "arrived", + "departed", + "left", + "returned", + "went back", + "came back", + "arrived back", + "departed again", + "left again", + "returned again", + "went away", + "came close", + "moved away", + "approached", + "reached", + "arrived at", + "departed from", + "left from", + "returned to", + "went to", + "came from", + "traveled to", + "traveled from", + "moved to", + "moved from", + "stayed at", + "remained at", + "waited for", + "rested at", + "slept at", + "woke up at", + "ate at", + "drank at", + "cooked at", + "cleaned at", + "washed at", + "dressed at", + "undressed at", + "showered at", + "bathed at", + "brushed at", + "combed at", + "shaved at", + "cut at", + "trimmed at", + "painted at", + "drew at", + "wrote at", + "read at", + "spoke at", + "listened at", + "heard at", + "saw at", + "watched at", + "looked at", + "observed at", + "noticed at", + "recognized at", + "remembered at", + "forgot at", + "learned at", + "understood at", + "knew at", + "believed at", + "doubted at", + "wondered at", + "thought at", + "considered at", + "decided at", + "chose at", + "selected at", + "preferred at", + "liked at", + "loved at", + "hated at", + "feared at", + "worried at", + "hoped at", + "expected at", + "planned at", + "prepared at", + "organized at", + "arranged at", + "scheduled at", + "timed at", + "measured at", + "counted at", + "calculated at", + "estimated at", + "valued at", + "priced at", + "cost at", + "paid at", + "bought at", + "sold at", + "traded at", + "exchanged at", + "shared at", + "divided at", + "combined at", + "joined at", + "connected at", + "attached at", + "separated at", + "divided at", + "cut at", + "broke at", + "fixed at", + "repaired at", + "built at", + "created at", + "made at", + "produced at", + "manufactured at", + ] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def sample( + self, + num_requests: int, + lora_path: Optional[str] = None, + max_loras: Optional[int] = None, + random_input_len: Optional[int] = None, + random_output_len: Optional[int] = None, + random_range_ratio: Optional[float] = None, + enable_multimodal_chat: bool = False, + **kwargs, + ) -> list: + samples = [] + + def sample_len(base_len: int, ratio: float) -> int: + if base_len is None: + return None + if ratio is None or ratio <= 0: + return base_len + lo = max(1, int(base_len * (1 - ratio))) + hi = int(base_len * (1 + ratio)) + return random.randint(lo, hi) + + for i in range(1, num_requests + 1): + # [length * (1 - range_ratio), length * (1 + range_ratio)] + sampled_input_len = sample_len(random_input_len, random_range_ratio) + sampled_output_len = sample_len(random_output_len, random_range_ratio) + + words = [random.choice(self.COMMON_WORDS) for _ in range(sampled_input_len)] + prompt_text = " ".join(words) + + data = { + "messages": [{"role": "user", "content": prompt_text}], + } + + samples.append( + SampleRequest( + no=i, + json_data=data, + prompt=prompt_text, + prompt_len=sampled_input_len, + history_QA=data["messages"], + expected_output_len=sampled_output_len, + random_flag=True, + ) + ) + return samples diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index ca721e9cb..b9e61ef7a 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -39,7 +39,7 @@ from backend_request_func import ( RequestFuncInput, RequestFuncOutput, ) -from benchmark_dataset import EBChatDataset, EBDataset, SampleRequest +from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from tqdm.asyncio import tqdm @@ -337,6 +337,7 @@ async def benchmark( ) test_history_QA = input_requests[0].history_QA response_format = input_requests[0].response_format + random_flag = input_requests[0].random_flag test_input = RequestFuncInput( model=model_id, @@ -353,6 +354,7 @@ async def benchmark( debug=debug, extra_body=extra_body, response_format=response_format, + random_flag=random_flag, ) print("test_input:", test_input) @@ -385,6 +387,7 @@ async def benchmark( ignore_eos=ignore_eos, extra_body=extra_body, response_format=response_format, + random_flag=random_flag, ) profile_output = await request_func(request_func_input=profile_input) if profile_output.success: @@ -424,6 +427,7 @@ async def benchmark( ) history_QA = request.history_QA response_format = request.response_format + random_flag = request.random_flag req_model_id, req_model_name = model_id, model_name if lora_modules: @@ -445,6 +449,7 @@ async def benchmark( ignore_eos=ignore_eos, extra_body=extra_body, response_format=response_format, + random_flag=random_flag, ) tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar))) outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks) @@ -461,6 +466,7 @@ async def benchmark( output_len=test_output_len, logprobs=logprobs, response_format=response_format, + random_flag=random_flag, ) profile_output = await request_func(request_func_input=profile_input) if profile_output.success: @@ -498,6 +504,12 @@ async def benchmark( benchmark_duration = time.perf_counter() - benchmark_start_time print(f"benchmark_duration: {benchmark_duration} 秒") + if random_flag: + print("指定随机输入输出长度测试") + print(f"random_input_len: {args.random_input_len}") + print(f"random_output_len: {args.random_output_len}") + print(f"random_range_ratio: {args.random_range_ratio}") + metrics, actual_output_lens = calculate_metrics( # input_requests=input_requests, outputs=benchmark_outputs, @@ -866,6 +878,12 @@ def main(args: argparse.Namespace): num_requests=args.num_prompts, output_len=args.sharegpt_output_len, ), + "random": lambda: RandomTextDataset().sample( + num_requests=args.num_prompts, + random_input_len=args.random_input_len, + random_output_len=args.random_output_len, + random_range_ratio=args.random_range_ratio, + ), } try: @@ -1021,15 +1039,10 @@ if __name__ == "__main__": parser.add_argument( "--dataset-name", type=str, - default="sharegpt", + default="EBChat", choices=[ - "sharegpt", - "burstgpt", - "sonnet", - "random", - "hf", - "EB", "EBChat", + "random", ], help="Name of the dataset to benchmark on.", ) @@ -1247,37 +1260,24 @@ if __name__ == "__main__": random_group.add_argument( "--random-input-len", type=int, - default=1024, - help="Number of input tokens per request, used only for random sampling.", + default=200, + help="Number of input English words per request, used only for random-text dataset.", ) random_group.add_argument( "--random-output-len", type=int, - default=128, - help="Number of output tokens per request, used only for random sampling.", + default=1024, + help="Number of output tokens per request, used both for random and random-text datasets.", ) random_group.add_argument( "--random-range-ratio", type=float, - default=0.0, + default=0.1, help="Range ratio for sampling input/output length, " "used only for random sampling. Must be in the range [0, 1) to define " "a symmetric sampling range" "[length * (1 - range_ratio), length * (1 + range_ratio)].", ) - random_group.add_argument( - "--random-prefix-len", - type=int, - default=0, - help=( - "Number of fixed prefix tokens before the random context " - "in a request. " - "The total input length is the sum of `random-prefix-len` and " - "a random " - "context length sampled from [input_len * (1 - range_ratio), " - "input_len * (1 + range_ratio)]." - ), - ) hf_group = parser.add_argument_group("hf dataset options") hf_group.add_argument("--hf-subset", type=str, default=None, help="Subset of the HF dataset.")