[CI] Add CI case for MTP accept ratio (#5570)

* Implement test for MTP accept ratio

Add test for MTP accept ratio with assertions on results and metrics.

* Update test_ernie_21b_mtp.py

* Refactor test_mtp_accept_ratio for baseline comparison

Refactor test_mtp_accept_ratio to compare results against baseline file and metrics.

* Fix formatting issues in test_ernie_21b_mtp.py

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
Zhang Yulong
2025-12-17 21:35:02 +08:00
committed by GitHub
parent ac731653b3
commit 94be5ebdd1

View File

@@ -297,3 +297,71 @@ def test_non_chat_usage_non_stream(api_url):
assert payload["max_tokens"] >= usage["completion_tokens"], "completion_tokens大于max_tokens"
assert payload["metadata"]["min_tokens"] <= usage["completion_tokens"], "completion_tokens小于min_tokens"
assert usage["total_tokens"] == total_tokens, "total_tokens不等于prompt_tokens + completion_tokens"
def test_mtp_accept_ratio(api_url):
"""测试mtp接受率"""
payload = {
"model": "default",
"messages": [
{
"role": "user",
"content": "国外项目风险管理研究起步较早,理论体系成熟。早期研究集中于保险与金融领域,后逐步扩展至工程项目、"
"公共管理等多领域。在理论层面COSO《企业风险管理——整合框架》和ISO31000标准为风险管理提供了系统性"
"指导,强调风险识别、评估、应对与监控的全流程管理。风险识别方法包括故障树分析、事件树分析等;风险评估"
"则广泛应用VaR模型、蒙特卡洛模拟等量化工具。应对策略涵盖规避、转移、减轻和接受等并衍生出风险共享、"
"升级等复杂策略。此外,组织文化、管理层支持等因素对风险管理有效性影响显著。近年来,随着科技发展,"
"人工智能、大数据等技术被引入风险管理,推动其向智能化、自动化方向发展。请介绍一下国外关于项目风险管理"
"的文献研究综述300字以内",
},
],
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"temperature": 0,
"seed": 23,
"top_p": 0,
}
print("fastdeploy answer is :")
try:
# TODO: 第一次和第二次存在diff后面正常暂时多请求一次
response = send_request(url=api_url, payload=payload)
chunks = get_stream_chunks(response)
response = send_request(url=api_url, payload=payload)
chunks = get_stream_chunks(response)
for idx, chunk in enumerate(chunks):
print(f"\nchunk[{idx}]:\n{json.dumps(chunk, ensure_ascii=False)}")
result = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
speculate_metrics = chunks[-2]["choices"][0]["speculate_metrics"]
except Exception as e:
print(f"解析失败: {e}")
print("\nresult:\n", result)
base_path = os.getenv("MODEL_PATH")
baseline_path = os.path.join(base_path, "21b_mtp_accept_ratio_baseline.txt")
with open(baseline_path, "r", encoding="utf-8") as f:
baseline = f.read()
baseline_ratio = {
"accepted_tokens": 131,
"rejected_tokens": 23,
"accept_ratio": 0.4122137404580153,
"average_accept_length": 1.7012987012987013,
"accept_ratio_per_head": [0.7012987012987013],
}
response = send_request(url=api_url, payload=payload)
chunks = get_stream_chunks(response)
result_2 = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
speculate_metrics_2 = chunks[-2]["choices"][0]["speculate_metrics"]
print("chunks:", chunks[-2])
print("baseline", speculate_metrics)
print("speculate_metrics_2", speculate_metrics_2)
assert result_2 == baseline, f"与baseline存在diffresult_2: {result}\n baseline: {baseline}"
assert speculate_metrics_2 == baseline_ratio, (
f"speculate_metrics存在diff" f"speculate_metrics_2: {speculate_metrics_2}\n " f"baseline: {baseline_ratio}"
)
assert speculate_metrics_2["accept_ratio"] > 0, "accept_ratio异常"
prompt_tokens = chunks[-1]["usage"]["prompt_tokens"]
cached_tokens = chunks[-1]["usage"]["prompt_tokens_details"]["cached_tokens"]
assert cached_tokens == prompt_tokens // 64 * 64, "cached_tokens数量有问题"