From 17731a8acd60985e8c8c5fd36a07fd6b6d4f976b Mon Sep 17 00:00:00 2001 From: Divano Date: Thu, 28 Aug 2025 18:30:19 +0800 Subject: [PATCH] add concurrency cases (#3689) --- tests/ce/server/test_max_concurrency.py | 54 ++++++++++++++++++++ tests/ce/server/test_max_waiting_time.py | 64 ++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 tests/ce/server/test_max_concurrency.py create mode 100644 tests/ce/server/test_max_waiting_time.py diff --git a/tests/ce/server/test_max_concurrency.py b/tests/ce/server/test_max_concurrency.py new file mode 100644 index 000000000..9cad9e800 --- /dev/null +++ b/tests/ce/server/test_max_concurrency.py @@ -0,0 +1,54 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# @author DDDivano +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python + + +import concurrent.futures +from collections import Counter + +from core import TEMPLATE, URL, build_request_payload, send_request + + +def test_concurrency(): + """ + 并发测试: + 同时发起 10 条请求,校验返回码是否为 5 个 200 和 5 个 429。 + --max-num-seqs 128 \ + --tensor-parallel-size 1 \ + --max-concurrency 5 \ + """ + + data = { + "stream": False, + "messages": [ + {"role": "user", "content": "1+1=? 直接回答"}, + ], + "max_tokens": 1000, + "temperature": 0.8, + "top_p": 0, + } + + def send_one_request(i): + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + print(f"请求 {i} 返回码: {response.status_code}") + return response.status_code + + # 并发执行 10 个请求 + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = [executor.submit(send_one_request, i) for i in range(10)] + results = [f.result() for f in futures] + + # 统计返回码 + counter = Counter(results) + count_200 = counter.get(200, 0) + count_429 = counter.get(429, 0) + + print(f"统计结果: 200={count_200}, 429={count_429}, 全部结果={results}") + + # 校验必须是 5 个 200 和 5 个 429 + assert count_200 == 5, f"200 数量错误: {count_200}" + assert count_429 == 5, f"429 数量错误: {count_429}" + + print("并发请求校验通过") diff --git a/tests/ce/server/test_max_waiting_time.py b/tests/ce/server/test_max_waiting_time.py new file mode 100644 index 000000000..63711c6a2 --- /dev/null +++ b/tests/ce/server/test_max_waiting_time.py @@ -0,0 +1,64 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# @author DDDivano +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python + + +import concurrent.futures +from collections import Counter + +from core import TEMPLATE, URL, build_request_payload, send_request + + +def test_waiting_time(): + """ + 并发测试: + 同时发起 1333 条请求。 + 校验所有返回码统计,且数量总和必须等于 1333。 + 额外校验:200 的数量必须小于 1333。 + --max-num-seqs 128 \ + --tensor-parallel-size 1 \ + --max-concurrency 5000 \ + --max-waiting-time 1 \ + """ + + data = { + "stream": False, + "messages": [ + {"role": "user", "content": "1+1=? 直接回答"}, + ], + "max_tokens": 10000, + "metadata": { + "min_tokens": 99, + }, + "temperature": 0.8, + "top_p": 0, + } + + def send_one_request(i): + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + print(f"请求 {i} 返回码: {response.status_code}") + return response.status_code + + # 并发执行 1333 个请求 + with concurrent.futures.ThreadPoolExecutor(max_workers=1333) as executor: + futures = [executor.submit(send_one_request, i) for i in range(1333)] + results = [f.result() for f in futures] + + # 统计所有返回码 + counter = Counter(results) + print("返回码统计结果:") + for code, cnt in sorted(counter.items()): + print(f" {code}: {cnt}") + + # 校验返回总数 + total = sum(counter.values()) + assert total == 1333, f"返回数量不一致,总数={total}, 期望=1333" + + # 校验 200 数量必须小于 1333 + count_200 = counter.get(200, 0) + assert count_200 < 1333, f"200 数量错误,应小于1333,实际={count_200}" + assert count_200 >= 1024, f"200 数量错误,应大于等于1024,实际={count_200}" + + print("并发请求校验通过")