Update benchmark tools (#3004)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* update benchmark tools

* update benchmark tools
This commit is contained in:
Zhang Yulong
2025-07-24 15:19:23 +08:00
committed by GitHub
parent f935d6f862
commit 5151bc92c8
4 changed files with 41 additions and 7 deletions

View File

@@ -41,7 +41,10 @@ python -m pip install -r requirements.txt
--metric-percentiles 80,95,99,99.9,99.95,99.99:性能结果中展示的性能指标分位值
--num-prompts 1总计发送多少条请求
--max-concurrency 1压测并发数
--save-result开启结果保存结果文件会存入json
--save-result开启结果保存结果文件会存入json默认False不保存
--debug开启debug模式逐条打印payload和output内容默认False
--shuffle是否打乱数据集默认False不打乱
--seed打乱数据集时的随机种子默认0
```
##### /v1/chat/completions接口压测单条数据调试

View File

@@ -50,6 +50,7 @@ class RequestFuncInput:
multi_modal_content: Optional[dict] = None
ignore_eos: bool = False
language: Optional[str] = None
debug: bool = False
@dataclass
@@ -98,7 +99,8 @@ async def async_request_eb_openai_chat_completions(
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos
print(f"payload:{json.dumps(payload, ensure_ascii=False)}")
if request_func_input.debug:
print(f"payload:{json.dumps(payload, ensure_ascii=False)}")
headers = {
"Content-Type": "application/json",
@@ -179,7 +181,8 @@ async def async_request_eb_openai_chat_completions(
f.write(str(output) + "\n")
if pbar:
pbar.update(1)
print("#####final_output:", output)
if request_func_input.debug:
print("#####final_output:", output)
return output
@@ -209,7 +212,8 @@ async def async_request_eb_openai_completions(
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos
print("payload:", json.dumps(payload, ensure_ascii=False))
if request_func_input.debug:
print("payload:", json.dumps(payload, ensure_ascii=False))
headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
@@ -288,7 +292,8 @@ async def async_request_eb_openai_completions(
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
print(f"final_output:{output}")
if request_func_input.debug:
print(f"final_output:{output}")
if pbar:
pbar.update(1)

View File

@@ -57,6 +57,7 @@ class BenchmarkDataset(ABC):
self,
dataset_path: Optional[str] = None,
random_seed: int = DEFAULT_SEED,
shuffle: bool = False,
hyperparameter_path: Optional[str] = None,
) -> None:
"""
@@ -72,6 +73,7 @@ class BenchmarkDataset(ABC):
# default seed.
self.random_seed = random_seed if random_seed is not None else self.DEFAULT_SEED
self.data = None
self.shuffle = shuffle
self.hyperparameter_path = hyperparameter_path
self.hyperparameters = {}
@@ -211,6 +213,10 @@ class EBDataset(BenchmarkDataset):
with open(self.dataset_path, encoding="utf-8") as f:
self.data = [json.loads(i.strip()) for i in f.readlines()]
if self.shuffle:
random.seed(self.random_seed)
random.shuffle(self.data)
def sample(
self,
num_requests: int,
@@ -270,6 +276,10 @@ class EBChatDataset(BenchmarkDataset):
with open(self.dataset_path, encoding="utf-8") as f:
self.data = [json.loads(i.strip()) for i in f.readlines()]
if self.shuffle:
random.seed(self.random_seed)
random.shuffle(self.data)
def sample(
self,
num_requests: int,

View File

@@ -317,6 +317,7 @@ async def benchmark(
selected_percentile_metrics: list[str],
selected_percentiles: list[float],
ignore_eos: bool,
debug: bool,
goodput_config_dict: dict[str, float],
max_concurrency: Optional[int],
lora_modules: Optional[Iterable[str]],
@@ -348,6 +349,7 @@ async def benchmark(
output_len=test_output_len,
logprobs=logprobs,
ignore_eos=ignore_eos,
debug=debug,
extra_body=extra_body,
)
@@ -435,6 +437,7 @@ async def benchmark(
api_url=api_url,
output_len=output_len,
logprobs=logprobs,
debug=debug,
ignore_eos=ignore_eos,
extra_body=extra_body,
)
@@ -819,11 +822,13 @@ def main(args: argparse.Namespace):
# For datasets that follow a similar structure, use a mapping.
dataset_mapping = {
"EB": lambda: EBDataset(random_seed=args.seed, dataset_path=args.dataset_path).sample(
"EB": lambda: EBDataset(random_seed=args.seed, dataset_path=args.dataset_path, shuffle=args.shuffle).sample(
num_requests=args.num_prompts,
output_len=args.sharegpt_output_len,
),
"EBChat": lambda: EBChatDataset(random_seed=args.seed, dataset_path=args.dataset_path).sample(
"EBChat": lambda: EBChatDataset(
random_seed=args.seed, dataset_path=args.dataset_path, shuffle=args.shuffle
).sample(
num_requests=args.num_prompts,
output_len=args.sharegpt_output_len,
),
@@ -883,6 +888,7 @@ def main(args: argparse.Namespace):
selected_percentile_metrics=args.percentile_metrics.split(","),
selected_percentiles=[float(p) for p in args.metric_percentiles.split(",")],
ignore_eos=args.ignore_eos,
debug=args.debug,
goodput_config_dict=goodput_config_dict,
max_concurrency=args.max_concurrency,
lora_modules=args.lora_modules,
@@ -1071,6 +1077,11 @@ if __name__ == "__main__":
"results in a more uniform arrival of requests.",
)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument(
"--shuffle",
action="store_true",
help="shuffle dataset",
)
parser.add_argument(
"--trust-remote-code",
action="store_true",
@@ -1091,6 +1102,11 @@ if __name__ == "__main__":
action="store_true",
help="Specify to save benchmark results to a json file",
)
parser.add_argument(
"--debug",
action="store_true",
help="print debug information (output)",
)
parser.add_argument(
"--save-detailed",
action="store_true",