Update benchmark tools (#3004)

* update benchmark tools * update benchmark tools
2025-09-26 20:41:53 +08:00 · 2025-07-24 15:19:23 +08:00
parent f935d6f862
commit 5151bc92c8
4 changed files with 41 additions and 7 deletions
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -41,7 +41,10 @@ python -m pip install -r requirements.txt
 --metric-percentiles 80,95,99,99.9,99.95,99.99：性能结果中展示的性能指标分位值
 --num-prompts 1：总计发送多少条请求
 --max-concurrency 1：压测并发数
--save-result：开启结果保存，结果文件会存入json
+--save-result：开启结果保存，结果文件会存入json，默认False不保存
+--debug：开启debug模式，逐条打印payload和output内容，默认False
+--shuffle：是否打乱数据集，默认False不打乱
+--seed：打乱数据集时的随机种子，默认0
 ```

 ##### /v1/chat/completions接口压测单条数据调试
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -50,6 +50,7 @@ class RequestFuncInput:
    multi_modal_content: Optional[dict] = None
    ignore_eos: bool = False
    language: Optional[str] = None
+    debug: bool = False


@dataclass
@@ -98,7 +99,8 @@ async def async_request_eb_openai_chat_completions(
        if request_func_input.ignore_eos:
            payload["ignore_eos"] = request_func_input.ignore_eos

-        print(f"payload:{json.dumps(payload, ensure_ascii=False)}")
+        if request_func_input.debug:
+            print(f"payload:{json.dumps(payload, ensure_ascii=False)}")

        headers = {
            "Content-Type": "application/json",
@@ -179,7 +181,8 @@ async def async_request_eb_openai_chat_completions(
                f.write(str(output) + "\n")
    if pbar:
        pbar.update(1)
-    print("#####final_output:", output)
+    if request_func_input.debug:
+        print("#####final_output:", output)
    return output


@@ -209,7 +212,8 @@ async def async_request_eb_openai_completions(
        if request_func_input.ignore_eos:
            payload["ignore_eos"] = request_func_input.ignore_eos

-        print("payload:", json.dumps(payload, ensure_ascii=False))
+        if request_func_input.debug:
+            print("payload:", json.dumps(payload, ensure_ascii=False))

        headers = {
            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
@@ -288,7 +292,8 @@ async def async_request_eb_openai_completions(
            exc_info = sys.exc_info()
            output.error = "".join(traceback.format_exception(*exc_info))

-        print(f"final_output:{output}")
+        if request_func_input.debug:
+            print(f"final_output:{output}")

    if pbar:
        pbar.update(1)
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -57,6 +57,7 @@ class BenchmarkDataset(ABC):
        self,
        dataset_path: Optional[str] = None,
        random_seed: int = DEFAULT_SEED,
+        shuffle: bool = False,
        hyperparameter_path: Optional[str] = None,
    ) -> None:
        """
@@ -72,6 +73,7 @@ class BenchmarkDataset(ABC):
        # default seed.
        self.random_seed = random_seed if random_seed is not None else self.DEFAULT_SEED
        self.data = None
+        self.shuffle = shuffle
        self.hyperparameter_path = hyperparameter_path
        self.hyperparameters = {}

@@ -211,6 +213,10 @@ class EBDataset(BenchmarkDataset):
        with open(self.dataset_path, encoding="utf-8") as f:
            self.data = [json.loads(i.strip()) for i in f.readlines()]

+        if self.shuffle:
+            random.seed(self.random_seed)
+            random.shuffle(self.data)
+
    def sample(
        self,
        num_requests: int,
@@ -270,6 +276,10 @@ class EBChatDataset(BenchmarkDataset):
        with open(self.dataset_path, encoding="utf-8") as f:
            self.data = [json.loads(i.strip()) for i in f.readlines()]

+        if self.shuffle:
+            random.seed(self.random_seed)
+            random.shuffle(self.data)
+
    def sample(
        self,
        num_requests: int,
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -317,6 +317,7 @@ async def benchmark(
    selected_percentile_metrics: list[str],
    selected_percentiles: list[float],
    ignore_eos: bool,
+    debug: bool,
    goodput_config_dict: dict[str, float],
    max_concurrency: Optional[int],
    lora_modules: Optional[Iterable[str]],
@@ -348,6 +349,7 @@ async def benchmark(
        output_len=test_output_len,
        logprobs=logprobs,
        ignore_eos=ignore_eos,
+        debug=debug,
        extra_body=extra_body,
    )

@@ -435,6 +437,7 @@ async def benchmark(
            api_url=api_url,
            output_len=output_len,
            logprobs=logprobs,
+            debug=debug,
            ignore_eos=ignore_eos,
            extra_body=extra_body,
        )
@@ -819,11 +822,13 @@ def main(args: argparse.Namespace):

    # For datasets that follow a similar structure, use a mapping.
    dataset_mapping = {
-        "EB": lambda: EBDataset(random_seed=args.seed, dataset_path=args.dataset_path).sample(
+        "EB": lambda: EBDataset(random_seed=args.seed, dataset_path=args.dataset_path, shuffle=args.shuffle).sample(
            num_requests=args.num_prompts,
            output_len=args.sharegpt_output_len,
        ),
-        "EBChat": lambda: EBChatDataset(random_seed=args.seed, dataset_path=args.dataset_path).sample(
+        "EBChat": lambda: EBChatDataset(
+            random_seed=args.seed, dataset_path=args.dataset_path, shuffle=args.shuffle
+        ).sample(
            num_requests=args.num_prompts,
            output_len=args.sharegpt_output_len,
        ),
@@ -883,6 +888,7 @@ def main(args: argparse.Namespace):
            selected_percentile_metrics=args.percentile_metrics.split(","),
            selected_percentiles=[float(p) for p in args.metric_percentiles.split(",")],
            ignore_eos=args.ignore_eos,
+            debug=args.debug,
            goodput_config_dict=goodput_config_dict,
            max_concurrency=args.max_concurrency,
            lora_modules=args.lora_modules,
@@ -1071,6 +1077,11 @@ if __name__ == "__main__":
        "results in a more uniform arrival of requests.",
    )
    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument(
+        "--shuffle",
+        action="store_true",
+        help="shuffle dataset",
+    )
    parser.add_argument(
        "--trust-remote-code",
        action="store_true",
@@ -1091,6 +1102,11 @@ if __name__ == "__main__":
        action="store_true",
        help="Specify to save benchmark results to a json file",
    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="print debug information (output)",
+    )
    parser.add_argument(
        "--save-detailed",
        action="store_true",