From 17731a8acd60985e8c8c5fd36a07fd6b6d4f976b Mon Sep 17 00:00:00 2001
From: Divano <dddivano@outlook.com>
Date: Thu, 28 Aug 2025 18:30:19 +0800
Subject: [PATCH] add concurrency cases (#3689)

---
 tests/ce/server/test_max_concurrency.py  | 54 ++++++++++++++++++++
 tests/ce/server/test_max_waiting_time.py | 64 ++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 tests/ce/server/test_max_concurrency.py
 create mode 100644 tests/ce/server/test_max_waiting_time.py

diff --git a/tests/ce/server/test_max_concurrency.py b/tests/ce/server/test_max_concurrency.py
new file mode 100644
index 000000000..9cad9e800
--- /dev/null
+++ b/tests/ce/server/test_max_concurrency.py
@@ -0,0 +1,54 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# @author DDDivano
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+
+import concurrent.futures
+from collections import Counter
+
+from core import TEMPLATE, URL, build_request_payload, send_request
+
+
+def test_concurrency():
+    """
+    并发测试：
+    同时发起 10 条请求，校验返回码是否为 5 个 200 和 5 个 429。
+    --max-num-seqs 128 \
+  --tensor-parallel-size 1 \
+  --max-concurrency 5 \
+    """
+
+    data = {
+        "stream": False,
+        "messages": [
+            {"role": "user", "content": "1+1=？ 直接回答"},
+        ],
+        "max_tokens": 1000,
+        "temperature": 0.8,
+        "top_p": 0,
+    }
+
+    def send_one_request(i):
+        payload = build_request_payload(TEMPLATE, data)
+        response = send_request(URL, payload)
+        print(f"请求 {i} 返回码: {response.status_code}")
+        return response.status_code
+
+    # 并发执行 10 个请求
+    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+        futures = [executor.submit(send_one_request, i) for i in range(10)]
+        results = [f.result() for f in futures]
+
+    # 统计返回码
+    counter = Counter(results)
+    count_200 = counter.get(200, 0)
+    count_429 = counter.get(429, 0)
+
+    print(f"统计结果: 200={count_200}, 429={count_429}, 全部结果={results}")
+
+    # 校验必须是 5 个 200 和 5 个 429
+    assert count_200 == 5, f"200 数量错误: {count_200}"
+    assert count_429 == 5, f"429 数量错误: {count_429}"
+
+    print("并发请求校验通过")
diff --git a/tests/ce/server/test_max_waiting_time.py b/tests/ce/server/test_max_waiting_time.py
new file mode 100644
index 000000000..63711c6a2
--- /dev/null
+++ b/tests/ce/server/test_max_waiting_time.py
@@ -0,0 +1,64 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# @author DDDivano
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+
+import concurrent.futures
+from collections import Counter
+
+from core import TEMPLATE, URL, build_request_payload, send_request
+
+
+def test_waiting_time():
+    """
+    并发测试：
+    同时发起 1333 条请求。
+    校验所有返回码统计，且数量总和必须等于 1333。
+    额外校验：200 的数量必须小于 1333。
+    --max-num-seqs 128 \
+  --tensor-parallel-size 1 \
+  --max-concurrency 5000 \
+  --max-waiting-time 1 \
+    """
+
+    data = {
+        "stream": False,
+        "messages": [
+            {"role": "user", "content": "1+1=？ 直接回答"},
+        ],
+        "max_tokens": 10000,
+        "metadata": {
+            "min_tokens": 99,
+        },
+        "temperature": 0.8,
+        "top_p": 0,
+    }
+
+    def send_one_request(i):
+        payload = build_request_payload(TEMPLATE, data)
+        response = send_request(URL, payload)
+        print(f"请求 {i} 返回码: {response.status_code}")
+        return response.status_code
+
+    # 并发执行 1333 个请求
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1333) as executor:
+        futures = [executor.submit(send_one_request, i) for i in range(1333)]
+        results = [f.result() for f in futures]
+
+    # 统计所有返回码
+    counter = Counter(results)
+    print("返回码统计结果:")
+    for code, cnt in sorted(counter.items()):
+        print(f"  {code}: {cnt}")
+
+    # 校验返回总数
+    total = sum(counter.values())
+    assert total == 1333, f"返回数量不一致，总数={total}, 期望=1333"
+
+    # 校验 200 数量必须小于 1333
+    count_200 = counter.get(200, 0)
+    assert count_200 < 1333, f"200 数量错误，应小于1333，实际={count_200}"
+    assert count_200 >= 1024, f"200 数量错误，应大于等于1024，实际={count_200}"
+
+    print("并发请求校验通过")