This commit is contained in:
Zhang Yulong
2025-11-28 18:29:16 +08:00
committed by GitHub
parent a535050b11
commit 5b49142988
4 changed files with 561 additions and 29 deletions

View File

@@ -58,7 +58,7 @@ python benchmark_serving.py \
--port 9812 \
--dataset-name EBChat \
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
--num-prompts 1 \
@@ -78,7 +78,7 @@ python benchmark_serving.py \
--port 9812 \
--dataset-name EBChat \
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
--num-prompts 2000 \
@@ -100,7 +100,7 @@ python benchmark_serving.py \
--port 9812 \
--dataset-name EBChat \
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
--num-prompts 2000 \
@@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \
--dataset-name指定数据集类指定为"EBChat"可读取转存的FD格式数据集
--dataset-path测试数据集路径
```
### 指定输入输出长度,构造随机纯文输入测试
相关参数:
- --dataset-name指定数据集类指定为"random"可构造随机纯文输入
- --random-input-len随机输入长度对应英文单词数默认200
- --random-output-len随机输出长度默认1024
- --random-range-ratio输入输出长度变化范围比[length *(1 - range_ratio), length* (1 + range_ratio)]默认0.1
#### 使用方式:
```bash
python benchmark_serving.py \
--backend openai-chat \
--model EB45T \
--endpoint /v1/chat/completions \
--host 0.0.0.0 \
--port 9812 \
--dataset-name random \
--random-input-len 200 \
--random-output-len 1024 \
--random-range-ratio 0.1 \
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
--num-prompts 2000 \
--max-concurrency 100 \
--save-result > infer_log.txt 2>&1 &
```

View File

@@ -52,6 +52,7 @@ class RequestFuncInput:
language: Optional[str] = None
debug: bool = False
response_format: Optional[dict] = None
random_flag: bool = False
@dataclass
@@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions(
# 超参由yaml传入
payload.update(request_func_input.hyper_parameters)
# 随机输入开关
if request_func_input.random_flag:
payload["max_tokens"] = request_func_input.output_len
metadata = payload.get("metadata", {})
metadata["min_tokens"] = request_func_input.output_len
payload["metadata"] = metadata
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos

View File

@@ -46,6 +46,7 @@ class SampleRequest:
prompt_len: int
expected_output_len: int
response_format: Optional[dict] = None
random_flag: bool = False
class BenchmarkDataset(ABC):
@@ -318,3 +319,499 @@ class EBChatDataset(BenchmarkDataset):
self.maybe_oversample_requests(samples, num_requests)
return samples
class RandomTextDataset(BenchmarkDataset):
"""
Generates random English words for pure text benchmarking.
"""
# Common English words vocabulary
COMMON_WORDS = [
"the",
"be",
"to",
"of",
"and",
"a",
"in",
"that",
"have",
"i",
"it",
"for",
"not",
"on",
"with",
"he",
"as",
"you",
"do",
"at",
"this",
"but",
"his",
"by",
"from",
"they",
"we",
"say",
"her",
"she",
"or",
"an",
"will",
"my",
"one",
"all",
"would",
"there",
"their",
"what",
"so",
"up",
"out",
"if",
"about",
"who",
"get",
"which",
"go",
"me",
"when",
"make",
"can",
"like",
"time",
"no",
"just",
"him",
"know",
"take",
"people",
"into",
"year",
"your",
"good",
"some",
"could",
"them",
"see",
"other",
"than",
"then",
"now",
"look",
"only",
"come",
"its",
"over",
"think",
"also",
"back",
"after",
"use",
"two",
"how",
"our",
"work",
"first",
"well",
"way",
"even",
"new",
"want",
"because",
"any",
"these",
"give",
"day",
"most",
"us",
"is",
"are",
"was",
"were",
"been",
"has",
"had",
"did",
"done",
"said",
"told",
"asked",
"thought",
"went",
"saw",
"looked",
"found",
"took",
"gave",
"made",
"put",
"set",
"got",
"ran",
"came",
"walked",
"stood",
"sat",
"lay",
"felt",
"heard",
"saw",
"knew",
"thought",
"understood",
"believed",
"wanted",
"needed",
"liked",
"loved",
"hated",
"feared",
"hoped",
"expected",
"planned",
"decided",
"agreed",
"disagreed",
"argued",
"discussed",
"explained",
"described",
"reported",
"announced",
"declared",
"stated",
"claimed",
"suggested",
"proposed",
"recommended",
"advised",
"warned",
"threatened",
"promised",
"offered",
"refused",
"denied",
"admitted",
"confessed",
"apologized",
"forgave",
"thanked",
"congratulated",
"celebrated",
"welcomed",
"greeted",
"introduced",
"presented",
"showed",
"demonstrated",
"proved",
"tested",
"examined",
"studied",
"learned",
"taught",
"trained",
"practiced",
"performed",
"played",
"worked",
"built",
"created",
"designed",
"developed",
"improved",
"changed",
"fixed",
"solved",
"completed",
"finished",
"started",
"began",
"continued",
"stopped",
"ended",
"left",
"arrived",
"departed",
"traveled",
"moved",
"stayed",
"waited",
"rested",
"slept",
"woke",
"ate",
"drank",
"cooked",
"cleaned",
"washed",
"dressed",
"undressed",
"showered",
"bathed",
"brushed",
"combed",
"shaved",
"cut",
"trimmed",
"painted",
"drew",
"wrote",
"read",
"spoke",
"listened",
"heard",
"saw",
"watched",
"looked",
"observed",
"noticed",
"recognized",
"remembered",
"forgot",
"learned",
"understood",
"knew",
"believed",
"doubted",
"wondered",
"thought",
"considered",
"decided",
"chose",
"selected",
"preferred",
"liked",
"loved",
"hated",
"feared",
"worried",
"hoped",
"expected",
"planned",
"prepared",
"organized",
"arranged",
"scheduled",
"timed",
"measured",
"counted",
"calculated",
"estimated",
"valued",
"priced",
"cost",
"paid",
"bought",
"sold",
"traded",
"exchanged",
"shared",
"divided",
"combined",
"joined",
"connected",
"attached",
"separated",
"divided",
"cut",
"broke",
"fixed",
"repaired",
"built",
"created",
"made",
"produced",
"manufactured",
"assembled",
"constructed",
"designed",
"planned",
"developed",
"improved",
"enhanced",
"changed",
"modified",
"adjusted",
"adapted",
"converted",
"transformed",
"turned",
"became",
"grew",
"developed",
"evolved",
"progressed",
"advanced",
"moved",
"went",
"came",
"arrived",
"departed",
"left",
"returned",
"went back",
"came back",
"arrived back",
"departed again",
"left again",
"returned again",
"went away",
"came close",
"moved away",
"approached",
"reached",
"arrived at",
"departed from",
"left from",
"returned to",
"went to",
"came from",
"traveled to",
"traveled from",
"moved to",
"moved from",
"stayed at",
"remained at",
"waited for",
"rested at",
"slept at",
"woke up at",
"ate at",
"drank at",
"cooked at",
"cleaned at",
"washed at",
"dressed at",
"undressed at",
"showered at",
"bathed at",
"brushed at",
"combed at",
"shaved at",
"cut at",
"trimmed at",
"painted at",
"drew at",
"wrote at",
"read at",
"spoke at",
"listened at",
"heard at",
"saw at",
"watched at",
"looked at",
"observed at",
"noticed at",
"recognized at",
"remembered at",
"forgot at",
"learned at",
"understood at",
"knew at",
"believed at",
"doubted at",
"wondered at",
"thought at",
"considered at",
"decided at",
"chose at",
"selected at",
"preferred at",
"liked at",
"loved at",
"hated at",
"feared at",
"worried at",
"hoped at",
"expected at",
"planned at",
"prepared at",
"organized at",
"arranged at",
"scheduled at",
"timed at",
"measured at",
"counted at",
"calculated at",
"estimated at",
"valued at",
"priced at",
"cost at",
"paid at",
"bought at",
"sold at",
"traded at",
"exchanged at",
"shared at",
"divided at",
"combined at",
"joined at",
"connected at",
"attached at",
"separated at",
"divided at",
"cut at",
"broke at",
"fixed at",
"repaired at",
"built at",
"created at",
"made at",
"produced at",
"manufactured at",
]
def __init__(self, **kwargs):
super().__init__(**kwargs)
def sample(
self,
num_requests: int,
lora_path: Optional[str] = None,
max_loras: Optional[int] = None,
random_input_len: Optional[int] = None,
random_output_len: Optional[int] = None,
random_range_ratio: Optional[float] = None,
enable_multimodal_chat: bool = False,
**kwargs,
) -> list:
samples = []
def sample_len(base_len: int, ratio: float) -> int:
if base_len is None:
return None
if ratio is None or ratio <= 0:
return base_len
lo = max(1, int(base_len * (1 - ratio)))
hi = int(base_len * (1 + ratio))
return random.randint(lo, hi)
for i in range(1, num_requests + 1):
# [length * (1 - range_ratio), length * (1 + range_ratio)]
sampled_input_len = sample_len(random_input_len, random_range_ratio)
sampled_output_len = sample_len(random_output_len, random_range_ratio)
words = [random.choice(self.COMMON_WORDS) for _ in range(sampled_input_len)]
prompt_text = " ".join(words)
data = {
"messages": [{"role": "user", "content": prompt_text}],
}
samples.append(
SampleRequest(
no=i,
json_data=data,
prompt=prompt_text,
prompt_len=sampled_input_len,
history_QA=data["messages"],
expected_output_len=sampled_output_len,
random_flag=True,
)
)
return samples

View File

@@ -39,7 +39,7 @@ from backend_request_func import (
RequestFuncInput,
RequestFuncOutput,
)
from benchmark_dataset import EBChatDataset, EBDataset, SampleRequest
from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
from tqdm.asyncio import tqdm
@@ -337,6 +337,7 @@ async def benchmark(
)
test_history_QA = input_requests[0].history_QA
response_format = input_requests[0].response_format
random_flag = input_requests[0].random_flag
test_input = RequestFuncInput(
model=model_id,
@@ -353,6 +354,7 @@ async def benchmark(
debug=debug,
extra_body=extra_body,
response_format=response_format,
random_flag=random_flag,
)
print("test_input:", test_input)
@@ -385,6 +387,7 @@ async def benchmark(
ignore_eos=ignore_eos,
extra_body=extra_body,
response_format=response_format,
random_flag=random_flag,
)
profile_output = await request_func(request_func_input=profile_input)
if profile_output.success:
@@ -424,6 +427,7 @@ async def benchmark(
)
history_QA = request.history_QA
response_format = request.response_format
random_flag = request.random_flag
req_model_id, req_model_name = model_id, model_name
if lora_modules:
@@ -445,6 +449,7 @@ async def benchmark(
ignore_eos=ignore_eos,
extra_body=extra_body,
response_format=response_format,
random_flag=random_flag,
)
tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -461,6 +466,7 @@ async def benchmark(
output_len=test_output_len,
logprobs=logprobs,
response_format=response_format,
random_flag=random_flag,
)
profile_output = await request_func(request_func_input=profile_input)
if profile_output.success:
@@ -498,6 +504,12 @@ async def benchmark(
benchmark_duration = time.perf_counter() - benchmark_start_time
print(f"benchmark_duration: {benchmark_duration}")
if random_flag:
print("指定随机输入输出长度测试")
print(f"random_input_len: {args.random_input_len}")
print(f"random_output_len: {args.random_output_len}")
print(f"random_range_ratio: {args.random_range_ratio}")
metrics, actual_output_lens = calculate_metrics(
# input_requests=input_requests,
outputs=benchmark_outputs,
@@ -866,6 +878,12 @@ def main(args: argparse.Namespace):
num_requests=args.num_prompts,
output_len=args.sharegpt_output_len,
),
"random": lambda: RandomTextDataset().sample(
num_requests=args.num_prompts,
random_input_len=args.random_input_len,
random_output_len=args.random_output_len,
random_range_ratio=args.random_range_ratio,
),
}
try:
@@ -1021,15 +1039,10 @@ if __name__ == "__main__":
parser.add_argument(
"--dataset-name",
type=str,
default="sharegpt",
default="EBChat",
choices=[
"sharegpt",
"burstgpt",
"sonnet",
"random",
"hf",
"EB",
"EBChat",
"random",
],
help="Name of the dataset to benchmark on.",
)
@@ -1247,37 +1260,24 @@ if __name__ == "__main__":
random_group.add_argument(
"--random-input-len",
type=int,
default=1024,
help="Number of input tokens per request, used only for random sampling.",
default=200,
help="Number of input English words per request, used only for random-text dataset.",
)
random_group.add_argument(
"--random-output-len",
type=int,
default=128,
help="Number of output tokens per request, used only for random sampling.",
default=1024,
help="Number of output tokens per request, used both for random and random-text datasets.",
)
random_group.add_argument(
"--random-range-ratio",
type=float,
default=0.0,
default=0.1,
help="Range ratio for sampling input/output length, "
"used only for random sampling. Must be in the range [0, 1) to define "
"a symmetric sampling range"
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
)
random_group.add_argument(
"--random-prefix-len",
type=int,
default=0,
help=(
"Number of fixed prefix tokens before the random context "
"in a request. "
"The total input length is the sum of `random-prefix-len` and "
"a random "
"context length sampled from [input_len * (1 - range_ratio), "
"input_len * (1 + range_ratio)]."
),
)
hf_group = parser.add_argument_group("hf dataset options")
hf_group.add_argument("--hf-subset", type=str, default=None, help="Subset of the HF dataset.")