mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[CI] Add CI case for MTP accept ratio (#5570)
* Implement test for MTP accept ratio Add test for MTP accept ratio with assertions on results and metrics. * Update test_ernie_21b_mtp.py * Refactor test_mtp_accept_ratio for baseline comparison Refactor test_mtp_accept_ratio to compare results against baseline file and metrics. * Fix formatting issues in test_ernie_21b_mtp.py --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
@@ -297,3 +297,71 @@ def test_non_chat_usage_non_stream(api_url):
|
||||
assert payload["max_tokens"] >= usage["completion_tokens"], "completion_tokens大于max_tokens"
|
||||
assert payload["metadata"]["min_tokens"] <= usage["completion_tokens"], "completion_tokens小于min_tokens"
|
||||
assert usage["total_tokens"] == total_tokens, "total_tokens不等于prompt_tokens + completion_tokens"
|
||||
|
||||
|
||||
def test_mtp_accept_ratio(api_url):
|
||||
"""测试mtp接受率"""
|
||||
payload = {
|
||||
"model": "default",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "国外项目风险管理研究起步较早,理论体系成熟。早期研究集中于保险与金融领域,后逐步扩展至工程项目、"
|
||||
"公共管理等多领域。在理论层面,COSO《企业风险管理——整合框架》和ISO31000标准为风险管理提供了系统性"
|
||||
"指导,强调风险识别、评估、应对与监控的全流程管理。风险识别方法包括故障树分析、事件树分析等;风险评估"
|
||||
"则广泛应用VaR模型、蒙特卡洛模拟等量化工具。应对策略涵盖规避、转移、减轻和接受等,并衍生出风险共享、"
|
||||
"升级等复杂策略。此外,组织文化、管理层支持等因素对风险管理有效性影响显著。近年来,随着科技发展,"
|
||||
"人工智能、大数据等技术被引入风险管理,推动其向智能化、自动化方向发展。请介绍一下国外关于项目风险管理"
|
||||
"的文献研究综述,300字以内",
|
||||
},
|
||||
],
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
|
||||
"temperature": 0,
|
||||
"seed": 23,
|
||||
"top_p": 0,
|
||||
}
|
||||
|
||||
print("fastdeploy answer is :")
|
||||
|
||||
try:
|
||||
# TODO: 第一次和第二次存在diff,后面正常,暂时多请求一次
|
||||
response = send_request(url=api_url, payload=payload)
|
||||
chunks = get_stream_chunks(response)
|
||||
response = send_request(url=api_url, payload=payload)
|
||||
chunks = get_stream_chunks(response)
|
||||
for idx, chunk in enumerate(chunks):
|
||||
print(f"\nchunk[{idx}]:\n{json.dumps(chunk, ensure_ascii=False)}")
|
||||
result = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
|
||||
speculate_metrics = chunks[-2]["choices"][0]["speculate_metrics"]
|
||||
except Exception as e:
|
||||
print(f"解析失败: {e}")
|
||||
print("\nresult:\n", result)
|
||||
|
||||
base_path = os.getenv("MODEL_PATH")
|
||||
baseline_path = os.path.join(base_path, "21b_mtp_accept_ratio_baseline.txt")
|
||||
with open(baseline_path, "r", encoding="utf-8") as f:
|
||||
baseline = f.read()
|
||||
baseline_ratio = {
|
||||
"accepted_tokens": 131,
|
||||
"rejected_tokens": 23,
|
||||
"accept_ratio": 0.4122137404580153,
|
||||
"average_accept_length": 1.7012987012987013,
|
||||
"accept_ratio_per_head": [0.7012987012987013],
|
||||
}
|
||||
|
||||
response = send_request(url=api_url, payload=payload)
|
||||
chunks = get_stream_chunks(response)
|
||||
result_2 = "".join([x["choices"][0]["delta"]["content"] for x in chunks[:-1]])
|
||||
speculate_metrics_2 = chunks[-2]["choices"][0]["speculate_metrics"]
|
||||
print("chunks:", chunks[-2])
|
||||
print("baseline", speculate_metrics)
|
||||
print("speculate_metrics_2", speculate_metrics_2)
|
||||
assert result_2 == baseline, f"与baseline存在diff,result_2: {result}\n baseline: {baseline}"
|
||||
assert speculate_metrics_2 == baseline_ratio, (
|
||||
f"speculate_metrics存在diff," f"speculate_metrics_2: {speculate_metrics_2}\n " f"baseline: {baseline_ratio}"
|
||||
)
|
||||
assert speculate_metrics_2["accept_ratio"] > 0, "accept_ratio异常"
|
||||
prompt_tokens = chunks[-1]["usage"]["prompt_tokens"]
|
||||
cached_tokens = chunks[-1]["usage"]["prompt_tokens_details"]["cached_tokens"]
|
||||
assert cached_tokens == prompt_tokens // 64 * 64, "cached_tokens数量有问题"
|
||||
|
||||
Reference in New Issue
Block a user