mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
update (#5298)
This commit is contained in:
@@ -58,7 +58,7 @@ python benchmark_serving.py \
|
||||
--port 9812 \
|
||||
--dataset-name EBChat \
|
||||
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
|
||||
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
|
||||
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
|
||||
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
|
||||
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
|
||||
--num-prompts 1 \
|
||||
@@ -78,7 +78,7 @@ python benchmark_serving.py \
|
||||
--port 9812 \
|
||||
--dataset-name EBChat \
|
||||
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
|
||||
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
|
||||
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
|
||||
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
|
||||
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
|
||||
--num-prompts 2000 \
|
||||
@@ -100,7 +100,7 @@ python benchmark_serving.py \
|
||||
--port 9812 \
|
||||
--dataset-name EBChat \
|
||||
--dataset-path ./filtered_sharedgpt_2000_input_1136_output_200_fd.json \
|
||||
--hyperparameter-path yaml/request_yaml/eb45t-32k.yaml \
|
||||
--hyperparameter-path yaml/request_yaml/eb45-32k.yaml \
|
||||
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
|
||||
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
|
||||
--num-prompts 2000 \
|
||||
@@ -135,3 +135,30 @@ python benchmarks/benchmark_mtp.py \
|
||||
--dataset-name:指定数据集类,指定为"EBChat"可读取转存的FD格式数据集
|
||||
--dataset-path:测试数据集路径
|
||||
```
|
||||
|
||||
### 指定输入输出长度,构造随机纯文输入测试
|
||||
|
||||
相关参数:
|
||||
- --dataset-name:指定数据集类,指定为"random"可构造随机纯文输入
|
||||
- --random-input-len:随机输入长度,对应英文单词数,默认200
|
||||
- --random-output-len:随机输出长度,默认1024
|
||||
- --random-range-ratio:输入输出长度变化范围比,[length *(1 - range_ratio), length* (1 + range_ratio)],默认0.1
|
||||
|
||||
#### 使用方式:
|
||||
```bash
|
||||
python benchmark_serving.py \
|
||||
--backend openai-chat \
|
||||
--model EB45T \
|
||||
--endpoint /v1/chat/completions \
|
||||
--host 0.0.0.0 \
|
||||
--port 9812 \
|
||||
--dataset-name random \
|
||||
--random-input-len 200 \
|
||||
--random-output-len 1024 \
|
||||
--random-range-ratio 0.1 \
|
||||
--percentile-metrics ttft,tpot,itl,e2el,s_ttft,s_itl,s_e2el,s_decode,input_len,s_input_len,output_len \
|
||||
--metric-percentiles 80,95,99,99.9,99.95,99.99 \
|
||||
--num-prompts 2000 \
|
||||
--max-concurrency 100 \
|
||||
--save-result > infer_log.txt 2>&1 &
|
||||
```
|
||||
|
||||
@@ -52,6 +52,7 @@ class RequestFuncInput:
|
||||
language: Optional[str] = None
|
||||
debug: bool = False
|
||||
response_format: Optional[dict] = None
|
||||
random_flag: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -103,6 +104,13 @@ async def async_request_eb_openai_chat_completions(
|
||||
# 超参由yaml传入
|
||||
payload.update(request_func_input.hyper_parameters)
|
||||
|
||||
# 随机输入开关
|
||||
if request_func_input.random_flag:
|
||||
payload["max_tokens"] = request_func_input.output_len
|
||||
metadata = payload.get("metadata", {})
|
||||
metadata["min_tokens"] = request_func_input.output_len
|
||||
payload["metadata"] = metadata
|
||||
|
||||
if request_func_input.ignore_eos:
|
||||
payload["ignore_eos"] = request_func_input.ignore_eos
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ class SampleRequest:
|
||||
prompt_len: int
|
||||
expected_output_len: int
|
||||
response_format: Optional[dict] = None
|
||||
random_flag: bool = False
|
||||
|
||||
|
||||
class BenchmarkDataset(ABC):
|
||||
@@ -318,3 +319,499 @@ class EBChatDataset(BenchmarkDataset):
|
||||
|
||||
self.maybe_oversample_requests(samples, num_requests)
|
||||
return samples
|
||||
|
||||
|
||||
class RandomTextDataset(BenchmarkDataset):
|
||||
"""
|
||||
Generates random English words for pure text benchmarking.
|
||||
"""
|
||||
|
||||
# Common English words vocabulary
|
||||
COMMON_WORDS = [
|
||||
"the",
|
||||
"be",
|
||||
"to",
|
||||
"of",
|
||||
"and",
|
||||
"a",
|
||||
"in",
|
||||
"that",
|
||||
"have",
|
||||
"i",
|
||||
"it",
|
||||
"for",
|
||||
"not",
|
||||
"on",
|
||||
"with",
|
||||
"he",
|
||||
"as",
|
||||
"you",
|
||||
"do",
|
||||
"at",
|
||||
"this",
|
||||
"but",
|
||||
"his",
|
||||
"by",
|
||||
"from",
|
||||
"they",
|
||||
"we",
|
||||
"say",
|
||||
"her",
|
||||
"she",
|
||||
"or",
|
||||
"an",
|
||||
"will",
|
||||
"my",
|
||||
"one",
|
||||
"all",
|
||||
"would",
|
||||
"there",
|
||||
"their",
|
||||
"what",
|
||||
"so",
|
||||
"up",
|
||||
"out",
|
||||
"if",
|
||||
"about",
|
||||
"who",
|
||||
"get",
|
||||
"which",
|
||||
"go",
|
||||
"me",
|
||||
"when",
|
||||
"make",
|
||||
"can",
|
||||
"like",
|
||||
"time",
|
||||
"no",
|
||||
"just",
|
||||
"him",
|
||||
"know",
|
||||
"take",
|
||||
"people",
|
||||
"into",
|
||||
"year",
|
||||
"your",
|
||||
"good",
|
||||
"some",
|
||||
"could",
|
||||
"them",
|
||||
"see",
|
||||
"other",
|
||||
"than",
|
||||
"then",
|
||||
"now",
|
||||
"look",
|
||||
"only",
|
||||
"come",
|
||||
"its",
|
||||
"over",
|
||||
"think",
|
||||
"also",
|
||||
"back",
|
||||
"after",
|
||||
"use",
|
||||
"two",
|
||||
"how",
|
||||
"our",
|
||||
"work",
|
||||
"first",
|
||||
"well",
|
||||
"way",
|
||||
"even",
|
||||
"new",
|
||||
"want",
|
||||
"because",
|
||||
"any",
|
||||
"these",
|
||||
"give",
|
||||
"day",
|
||||
"most",
|
||||
"us",
|
||||
"is",
|
||||
"are",
|
||||
"was",
|
||||
"were",
|
||||
"been",
|
||||
"has",
|
||||
"had",
|
||||
"did",
|
||||
"done",
|
||||
"said",
|
||||
"told",
|
||||
"asked",
|
||||
"thought",
|
||||
"went",
|
||||
"saw",
|
||||
"looked",
|
||||
"found",
|
||||
"took",
|
||||
"gave",
|
||||
"made",
|
||||
"put",
|
||||
"set",
|
||||
"got",
|
||||
"ran",
|
||||
"came",
|
||||
"walked",
|
||||
"stood",
|
||||
"sat",
|
||||
"lay",
|
||||
"felt",
|
||||
"heard",
|
||||
"saw",
|
||||
"knew",
|
||||
"thought",
|
||||
"understood",
|
||||
"believed",
|
||||
"wanted",
|
||||
"needed",
|
||||
"liked",
|
||||
"loved",
|
||||
"hated",
|
||||
"feared",
|
||||
"hoped",
|
||||
"expected",
|
||||
"planned",
|
||||
"decided",
|
||||
"agreed",
|
||||
"disagreed",
|
||||
"argued",
|
||||
"discussed",
|
||||
"explained",
|
||||
"described",
|
||||
"reported",
|
||||
"announced",
|
||||
"declared",
|
||||
"stated",
|
||||
"claimed",
|
||||
"suggested",
|
||||
"proposed",
|
||||
"recommended",
|
||||
"advised",
|
||||
"warned",
|
||||
"threatened",
|
||||
"promised",
|
||||
"offered",
|
||||
"refused",
|
||||
"denied",
|
||||
"admitted",
|
||||
"confessed",
|
||||
"apologized",
|
||||
"forgave",
|
||||
"thanked",
|
||||
"congratulated",
|
||||
"celebrated",
|
||||
"welcomed",
|
||||
"greeted",
|
||||
"introduced",
|
||||
"presented",
|
||||
"showed",
|
||||
"demonstrated",
|
||||
"proved",
|
||||
"tested",
|
||||
"examined",
|
||||
"studied",
|
||||
"learned",
|
||||
"taught",
|
||||
"trained",
|
||||
"practiced",
|
||||
"performed",
|
||||
"played",
|
||||
"worked",
|
||||
"built",
|
||||
"created",
|
||||
"designed",
|
||||
"developed",
|
||||
"improved",
|
||||
"changed",
|
||||
"fixed",
|
||||
"solved",
|
||||
"completed",
|
||||
"finished",
|
||||
"started",
|
||||
"began",
|
||||
"continued",
|
||||
"stopped",
|
||||
"ended",
|
||||
"left",
|
||||
"arrived",
|
||||
"departed",
|
||||
"traveled",
|
||||
"moved",
|
||||
"stayed",
|
||||
"waited",
|
||||
"rested",
|
||||
"slept",
|
||||
"woke",
|
||||
"ate",
|
||||
"drank",
|
||||
"cooked",
|
||||
"cleaned",
|
||||
"washed",
|
||||
"dressed",
|
||||
"undressed",
|
||||
"showered",
|
||||
"bathed",
|
||||
"brushed",
|
||||
"combed",
|
||||
"shaved",
|
||||
"cut",
|
||||
"trimmed",
|
||||
"painted",
|
||||
"drew",
|
||||
"wrote",
|
||||
"read",
|
||||
"spoke",
|
||||
"listened",
|
||||
"heard",
|
||||
"saw",
|
||||
"watched",
|
||||
"looked",
|
||||
"observed",
|
||||
"noticed",
|
||||
"recognized",
|
||||
"remembered",
|
||||
"forgot",
|
||||
"learned",
|
||||
"understood",
|
||||
"knew",
|
||||
"believed",
|
||||
"doubted",
|
||||
"wondered",
|
||||
"thought",
|
||||
"considered",
|
||||
"decided",
|
||||
"chose",
|
||||
"selected",
|
||||
"preferred",
|
||||
"liked",
|
||||
"loved",
|
||||
"hated",
|
||||
"feared",
|
||||
"worried",
|
||||
"hoped",
|
||||
"expected",
|
||||
"planned",
|
||||
"prepared",
|
||||
"organized",
|
||||
"arranged",
|
||||
"scheduled",
|
||||
"timed",
|
||||
"measured",
|
||||
"counted",
|
||||
"calculated",
|
||||
"estimated",
|
||||
"valued",
|
||||
"priced",
|
||||
"cost",
|
||||
"paid",
|
||||
"bought",
|
||||
"sold",
|
||||
"traded",
|
||||
"exchanged",
|
||||
"shared",
|
||||
"divided",
|
||||
"combined",
|
||||
"joined",
|
||||
"connected",
|
||||
"attached",
|
||||
"separated",
|
||||
"divided",
|
||||
"cut",
|
||||
"broke",
|
||||
"fixed",
|
||||
"repaired",
|
||||
"built",
|
||||
"created",
|
||||
"made",
|
||||
"produced",
|
||||
"manufactured",
|
||||
"assembled",
|
||||
"constructed",
|
||||
"designed",
|
||||
"planned",
|
||||
"developed",
|
||||
"improved",
|
||||
"enhanced",
|
||||
"changed",
|
||||
"modified",
|
||||
"adjusted",
|
||||
"adapted",
|
||||
"converted",
|
||||
"transformed",
|
||||
"turned",
|
||||
"became",
|
||||
"grew",
|
||||
"developed",
|
||||
"evolved",
|
||||
"progressed",
|
||||
"advanced",
|
||||
"moved",
|
||||
"went",
|
||||
"came",
|
||||
"arrived",
|
||||
"departed",
|
||||
"left",
|
||||
"returned",
|
||||
"went back",
|
||||
"came back",
|
||||
"arrived back",
|
||||
"departed again",
|
||||
"left again",
|
||||
"returned again",
|
||||
"went away",
|
||||
"came close",
|
||||
"moved away",
|
||||
"approached",
|
||||
"reached",
|
||||
"arrived at",
|
||||
"departed from",
|
||||
"left from",
|
||||
"returned to",
|
||||
"went to",
|
||||
"came from",
|
||||
"traveled to",
|
||||
"traveled from",
|
||||
"moved to",
|
||||
"moved from",
|
||||
"stayed at",
|
||||
"remained at",
|
||||
"waited for",
|
||||
"rested at",
|
||||
"slept at",
|
||||
"woke up at",
|
||||
"ate at",
|
||||
"drank at",
|
||||
"cooked at",
|
||||
"cleaned at",
|
||||
"washed at",
|
||||
"dressed at",
|
||||
"undressed at",
|
||||
"showered at",
|
||||
"bathed at",
|
||||
"brushed at",
|
||||
"combed at",
|
||||
"shaved at",
|
||||
"cut at",
|
||||
"trimmed at",
|
||||
"painted at",
|
||||
"drew at",
|
||||
"wrote at",
|
||||
"read at",
|
||||
"spoke at",
|
||||
"listened at",
|
||||
"heard at",
|
||||
"saw at",
|
||||
"watched at",
|
||||
"looked at",
|
||||
"observed at",
|
||||
"noticed at",
|
||||
"recognized at",
|
||||
"remembered at",
|
||||
"forgot at",
|
||||
"learned at",
|
||||
"understood at",
|
||||
"knew at",
|
||||
"believed at",
|
||||
"doubted at",
|
||||
"wondered at",
|
||||
"thought at",
|
||||
"considered at",
|
||||
"decided at",
|
||||
"chose at",
|
||||
"selected at",
|
||||
"preferred at",
|
||||
"liked at",
|
||||
"loved at",
|
||||
"hated at",
|
||||
"feared at",
|
||||
"worried at",
|
||||
"hoped at",
|
||||
"expected at",
|
||||
"planned at",
|
||||
"prepared at",
|
||||
"organized at",
|
||||
"arranged at",
|
||||
"scheduled at",
|
||||
"timed at",
|
||||
"measured at",
|
||||
"counted at",
|
||||
"calculated at",
|
||||
"estimated at",
|
||||
"valued at",
|
||||
"priced at",
|
||||
"cost at",
|
||||
"paid at",
|
||||
"bought at",
|
||||
"sold at",
|
||||
"traded at",
|
||||
"exchanged at",
|
||||
"shared at",
|
||||
"divided at",
|
||||
"combined at",
|
||||
"joined at",
|
||||
"connected at",
|
||||
"attached at",
|
||||
"separated at",
|
||||
"divided at",
|
||||
"cut at",
|
||||
"broke at",
|
||||
"fixed at",
|
||||
"repaired at",
|
||||
"built at",
|
||||
"created at",
|
||||
"made at",
|
||||
"produced at",
|
||||
"manufactured at",
|
||||
]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def sample(
|
||||
self,
|
||||
num_requests: int,
|
||||
lora_path: Optional[str] = None,
|
||||
max_loras: Optional[int] = None,
|
||||
random_input_len: Optional[int] = None,
|
||||
random_output_len: Optional[int] = None,
|
||||
random_range_ratio: Optional[float] = None,
|
||||
enable_multimodal_chat: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
samples = []
|
||||
|
||||
def sample_len(base_len: int, ratio: float) -> int:
|
||||
if base_len is None:
|
||||
return None
|
||||
if ratio is None or ratio <= 0:
|
||||
return base_len
|
||||
lo = max(1, int(base_len * (1 - ratio)))
|
||||
hi = int(base_len * (1 + ratio))
|
||||
return random.randint(lo, hi)
|
||||
|
||||
for i in range(1, num_requests + 1):
|
||||
# [length * (1 - range_ratio), length * (1 + range_ratio)]
|
||||
sampled_input_len = sample_len(random_input_len, random_range_ratio)
|
||||
sampled_output_len = sample_len(random_output_len, random_range_ratio)
|
||||
|
||||
words = [random.choice(self.COMMON_WORDS) for _ in range(sampled_input_len)]
|
||||
prompt_text = " ".join(words)
|
||||
|
||||
data = {
|
||||
"messages": [{"role": "user", "content": prompt_text}],
|
||||
}
|
||||
|
||||
samples.append(
|
||||
SampleRequest(
|
||||
no=i,
|
||||
json_data=data,
|
||||
prompt=prompt_text,
|
||||
prompt_len=sampled_input_len,
|
||||
history_QA=data["messages"],
|
||||
expected_output_len=sampled_output_len,
|
||||
random_flag=True,
|
||||
)
|
||||
)
|
||||
return samples
|
||||
|
||||
@@ -39,7 +39,7 @@ from backend_request_func import (
|
||||
RequestFuncInput,
|
||||
RequestFuncOutput,
|
||||
)
|
||||
from benchmark_dataset import EBChatDataset, EBDataset, SampleRequest
|
||||
from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest
|
||||
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
|
||||
from tqdm.asyncio import tqdm
|
||||
|
||||
@@ -337,6 +337,7 @@ async def benchmark(
|
||||
)
|
||||
test_history_QA = input_requests[0].history_QA
|
||||
response_format = input_requests[0].response_format
|
||||
random_flag = input_requests[0].random_flag
|
||||
|
||||
test_input = RequestFuncInput(
|
||||
model=model_id,
|
||||
@@ -353,6 +354,7 @@ async def benchmark(
|
||||
debug=debug,
|
||||
extra_body=extra_body,
|
||||
response_format=response_format,
|
||||
random_flag=random_flag,
|
||||
)
|
||||
|
||||
print("test_input:", test_input)
|
||||
@@ -385,6 +387,7 @@ async def benchmark(
|
||||
ignore_eos=ignore_eos,
|
||||
extra_body=extra_body,
|
||||
response_format=response_format,
|
||||
random_flag=random_flag,
|
||||
)
|
||||
profile_output = await request_func(request_func_input=profile_input)
|
||||
if profile_output.success:
|
||||
@@ -424,6 +427,7 @@ async def benchmark(
|
||||
)
|
||||
history_QA = request.history_QA
|
||||
response_format = request.response_format
|
||||
random_flag = request.random_flag
|
||||
|
||||
req_model_id, req_model_name = model_id, model_name
|
||||
if lora_modules:
|
||||
@@ -445,6 +449,7 @@ async def benchmark(
|
||||
ignore_eos=ignore_eos,
|
||||
extra_body=extra_body,
|
||||
response_format=response_format,
|
||||
random_flag=random_flag,
|
||||
)
|
||||
tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
|
||||
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
|
||||
@@ -461,6 +466,7 @@ async def benchmark(
|
||||
output_len=test_output_len,
|
||||
logprobs=logprobs,
|
||||
response_format=response_format,
|
||||
random_flag=random_flag,
|
||||
)
|
||||
profile_output = await request_func(request_func_input=profile_input)
|
||||
if profile_output.success:
|
||||
@@ -498,6 +504,12 @@ async def benchmark(
|
||||
benchmark_duration = time.perf_counter() - benchmark_start_time
|
||||
print(f"benchmark_duration: {benchmark_duration} 秒")
|
||||
|
||||
if random_flag:
|
||||
print("指定随机输入输出长度测试")
|
||||
print(f"random_input_len: {args.random_input_len}")
|
||||
print(f"random_output_len: {args.random_output_len}")
|
||||
print(f"random_range_ratio: {args.random_range_ratio}")
|
||||
|
||||
metrics, actual_output_lens = calculate_metrics(
|
||||
# input_requests=input_requests,
|
||||
outputs=benchmark_outputs,
|
||||
@@ -866,6 +878,12 @@ def main(args: argparse.Namespace):
|
||||
num_requests=args.num_prompts,
|
||||
output_len=args.sharegpt_output_len,
|
||||
),
|
||||
"random": lambda: RandomTextDataset().sample(
|
||||
num_requests=args.num_prompts,
|
||||
random_input_len=args.random_input_len,
|
||||
random_output_len=args.random_output_len,
|
||||
random_range_ratio=args.random_range_ratio,
|
||||
),
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -1021,15 +1039,10 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--dataset-name",
|
||||
type=str,
|
||||
default="sharegpt",
|
||||
default="EBChat",
|
||||
choices=[
|
||||
"sharegpt",
|
||||
"burstgpt",
|
||||
"sonnet",
|
||||
"random",
|
||||
"hf",
|
||||
"EB",
|
||||
"EBChat",
|
||||
"random",
|
||||
],
|
||||
help="Name of the dataset to benchmark on.",
|
||||
)
|
||||
@@ -1247,37 +1260,24 @@ if __name__ == "__main__":
|
||||
random_group.add_argument(
|
||||
"--random-input-len",
|
||||
type=int,
|
||||
default=1024,
|
||||
help="Number of input tokens per request, used only for random sampling.",
|
||||
default=200,
|
||||
help="Number of input English words per request, used only for random-text dataset.",
|
||||
)
|
||||
random_group.add_argument(
|
||||
"--random-output-len",
|
||||
type=int,
|
||||
default=128,
|
||||
help="Number of output tokens per request, used only for random sampling.",
|
||||
default=1024,
|
||||
help="Number of output tokens per request, used both for random and random-text datasets.",
|
||||
)
|
||||
random_group.add_argument(
|
||||
"--random-range-ratio",
|
||||
type=float,
|
||||
default=0.0,
|
||||
default=0.1,
|
||||
help="Range ratio for sampling input/output length, "
|
||||
"used only for random sampling. Must be in the range [0, 1) to define "
|
||||
"a symmetric sampling range"
|
||||
"[length * (1 - range_ratio), length * (1 + range_ratio)].",
|
||||
)
|
||||
random_group.add_argument(
|
||||
"--random-prefix-len",
|
||||
type=int,
|
||||
default=0,
|
||||
help=(
|
||||
"Number of fixed prefix tokens before the random context "
|
||||
"in a request. "
|
||||
"The total input length is the sum of `random-prefix-len` and "
|
||||
"a random "
|
||||
"context length sampled from [input_len * (1 - range_ratio), "
|
||||
"input_len * (1 + range_ratio)]."
|
||||
),
|
||||
)
|
||||
|
||||
hf_group = parser.add_argument_group("hf dataset options")
|
||||
hf_group.add_argument("--hf-subset", type=str, default=None, help="Subset of the HF dataset.")
|
||||
|
||||
Reference in New Issue
Block a user