Add ci case (#3355)

* add ci cases * debug debug H20 baseline * Update run_pre_ce.sh * Update test_EB_Lite_serving.py * Update test_EB_VL_Lite_serving.py * Update test_EB_Lite_serving_mtp.py * Update test_Qwen3-MoE_serving.py * Update test_Qwen2-7B-Instruct_serving.py * Update run_pre_ce.sh
2025-10-13 04:13:58 +08:00 · 2025-08-18 11:35:56 +08:00
parent e88f5552db
commit 3ee6053e5d
5 changed files with 201 additions and 1 deletions
--- a/test/ci_use/EB_Lite/test_EB_Lite_serving.py
+++ b/test/ci_use/EB_Lite/test_EB_Lite_serving.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import re
 import signal
 import socket
 import subprocess
@@ -998,3 +999,42 @@ def test_streaming_completion_with_bad_words(openai_client, capsys):
            output_tokens_1.append(chunk.choices[0].text)
            output_ids_1.extend(chunk.choices[0].completion_token_ids)
    assert not any(ids in output_ids_1 for ids in bad_token_ids)
 def test_profile_reset_block_num():
    """测试profile reset_block_num功能，与baseline diff不能超过5%"""
    log_file = "./log/config.log"
    baseline = 31446
    if not os.path.exists(log_file):
        pytest.fail(f"Log file not found: {log_file}")
    with open(log_file, "r") as f:
        log_lines = f.readlines()
    target_line = None
    for line in log_lines:
        if "Reset block num" in line:
            target_line = line.strip()
            break
    if target_line is None:
        pytest.fail("日志中没有Reset block num信息")
    match = re.search(r"total_block_num:(\d+)", target_line)
    if not match:
        pytest.fail(f"Failed to extract total_block_num from line: {target_line}")
    try:
        actual_value = int(match.group(1))
    except ValueError:
        pytest.fail(f"Invalid number format: {match.group(1)}")
    lower_bound = baseline * (1 - 0.05)
    upper_bound = baseline * (1 + 0.05)
    print(f"Reset total_block_num: {actual_value}. baseline: {baseline}")
    assert lower_bound <= actual_value <= upper_bound, (
        f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内"
        f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]"
    )
--- a/test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py
+++ b/test/ci_use/EB_Lite_mtp/test_EB_Lite_serving_mtp.py
@@ -14,6 +14,7 @@
 import json
 import os
 import re
 import signal
 import socket
 import subprocess
@@ -344,3 +345,42 @@ def test_streaming(openai_client, capsys):
    for chunk in response:
        output.append(chunk.choices[0].text)
    assert len(output) > 0
 def test_profile_reset_block_num():
    """测试profile reset_block_num功能，与baseline diff不能超过5%"""
    log_file = "./log/config.log"
    baseline = 30065
    if not os.path.exists(log_file):
        pytest.fail(f"Log file not found: {log_file}")
    with open(log_file, "r") as f:
        log_lines = f.readlines()
    target_line = None
    for line in log_lines:
        if "Reset block num" in line:
            target_line = line.strip()
            break
    if target_line is None:
        pytest.fail("日志中没有Reset block num信息")
    match = re.search(r"total_block_num:(\d+)", target_line)
    if not match:
        pytest.fail(f"Failed to extract total_block_num from line: {target_line}")
    try:
        actual_value = int(match.group(1))
    except ValueError:
        pytest.fail(f"Invalid number format: {match.group(1)}")
    lower_bound = baseline * (1 - 0.05)
    upper_bound = baseline * (1 + 0.05)
    print(f"Reset total_block_num: {actual_value}. baseline: {baseline}")
    assert lower_bound <= actual_value <= upper_bound, (
        f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内"
        f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]"
    )
--- a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
+++ b/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
@@ -14,6 +14,7 @@
 import json
 import os
 import re
 import signal
 import socket
 import subprocess
@@ -535,3 +536,42 @@ def test_chat_with_thinking(openai_client, capsys):
        total_tokens += len(delta_message.completion_token_ids)
    assert completion_tokens + reasoning_tokens == total_tokens
    assert reasoning_tokens <= reasoning_max_tokens
 def test_profile_reset_block_num():
    """测试profile reset_block_num功能，与baseline diff不能超过5%"""
    log_file = "./log/config.log"
    baseline = 40000
    if not os.path.exists(log_file):
        pytest.fail(f"Log file not found: {log_file}")
    with open(log_file, "r") as f:
        log_lines = f.readlines()
    target_line = None
    for line in log_lines:
        if "Reset block num" in line:
            target_line = line.strip()
            break
    if target_line is None:
        pytest.fail("日志中没有Reset block num信息")
    match = re.search(r"total_block_num:(\d+)", target_line)
    if not match:
        pytest.fail(f"Failed to extract total_block_num from line: {target_line}")
    try:
        actual_value = int(match.group(1))
    except ValueError:
        pytest.fail(f"Invalid number format: {match.group(1)}")
    lower_bound = baseline * (1 - 0.05)
    upper_bound = baseline * (1 + 0.05)
    print(f"Reset total_block_num: {actual_value}. baseline: {baseline}")
    assert lower_bound <= actual_value <= upper_bound, (
        f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内"
        f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]"
    )
--- a/test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py
+++ b/test/ci_use/Qwen2-7B-Instruct_serving/test_Qwen2-7B-Instruct_serving.py
@@ -15,6 +15,7 @@
 import concurrent.futures
 import json
 import os
 import re
 import signal
 import socket
 import subprocess
@@ -599,3 +600,42 @@ def test_streaming(openai_client, capsys):
    for chunk in response:
        output.append(chunk.choices[0].text)
    assert len(output) > 0
 def test_profile_reset_block_num():
    """测试profile reset_block_num功能，与baseline diff不能超过5%"""
    log_file = "./log/config.log"
    baseline = 32562
    if not os.path.exists(log_file):
        pytest.fail(f"Log file not found: {log_file}")
    with open(log_file, "r") as f:
        log_lines = f.readlines()
    target_line = None
    for line in log_lines:
        if "Reset block num" in line:
            target_line = line.strip()
            break
    if target_line is None:
        pytest.fail("日志中没有Reset block num信息")
    match = re.search(r"total_block_num:(\d+)", target_line)
    if not match:
        pytest.fail(f"Failed to extract total_block_num from line: {target_line}")
    try:
        actual_value = int(match.group(1))
    except ValueError:
        pytest.fail(f"Invalid number format: {match.group(1)}")
    lower_bound = baseline * (1 - 0.05)
    upper_bound = baseline * (1 + 0.05)
    print(f"Reset total_block_num: {actual_value}. baseline: {baseline}")
    assert lower_bound <= actual_value <= upper_bound, (
        f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内"
        f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]"
    )
--- a/test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py
+++ b/test/ci_use/Qwen3-MoE/test_Qwen3-MoE_serving.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import re
 import signal
 import socket
 import subprocess
@@ -116,7 +117,7 @@ def setup_and_run_server():
        )
    # Wait up to 300 seconds for API server to be ready
-    for _ in range(300):
+    for _ in range(480):
        if is_port_open("127.0.0.1", FD_API_PORT):
            print(f"API server is up on port {FD_API_PORT}")
            break
@@ -297,3 +298,42 @@ def test_non_thinking_prompt(api_url, headers):
    assert not any(
        x in content for x in ["根据", "我认为", "推测", "可能"]
    ), "Expected no reasoning in non-thinking response"
 def test_profile_reset_block_num():
    """测试profile reset_block_num功能，与baseline diff不能超过5%"""
    log_file = "./log/config.log"
    baseline = 17864
    if not os.path.exists(log_file):
        pytest.fail(f"Log file not found: {log_file}")
    with open(log_file, "r") as f:
        log_lines = f.readlines()
    target_line = None
    for line in log_lines:
        if "Reset block num" in line:
            target_line = line.strip()
            break
    if target_line is None:
        pytest.fail("日志中没有Reset block num信息")
    match = re.search(r"total_block_num:(\d+)", target_line)
    if not match:
        pytest.fail(f"Failed to extract total_block_num from line: {target_line}")
    try:
        actual_value = int(match.group(1))
    except ValueError:
        pytest.fail(f"Invalid number format: {match.group(1)}")
    lower_bound = baseline * (1 - 0.05)
    upper_bound = baseline * (1 + 0.05)
    print(f"Reset total_block_num: {actual_value}. baseline: {baseline}")
    assert lower_bound <= actual_value <= upper_bound, (
        f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内"
        f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]"
    )