* update CI cases

* update CI cases

* update CI cases

* update CI cases

* Merge upstream/develop and resolve directory rename conflict

* Merge upstream/develop and resolve directory rename conflict

* Merge upstream/develop and resolve directory rename conflict

* update deploy

* update deploy

* update deploy

* update deploy

* update deploy
This commit is contained in:
Zhang Yulong
2025-08-21 16:49:20 +08:00
committed by GitHub
parent c83381d650
commit b7eee3aec1
13 changed files with 150 additions and 392 deletions

View File

@@ -18,8 +18,18 @@ TOKEN_LOGPROB = {
"max_tokens": 10000,
}
TOKEN_NORMAL = {
"model": "default",
"temperature": 0,
"top_p": 0,
"seed": 33,
"stream": True,
"max_tokens": 10000,
}
TEMPLATES = {
"TOKEN_LOGPROB": TOKEN_LOGPROB,
"TOKEN_NORMAL": TOKEN_NORMAL,
# "ANOTHER_TEMPLATE": ANOTHER_TEMPLATE
}

View File

@@ -5,6 +5,7 @@
import json
import pytest
from core import TEMPLATE, URL, build_request_payload, get_stream_chunks, send_request
@@ -44,7 +45,7 @@ def test_seed_stream():
assert l1 == l2, f"top_p=0, 固定seed, logprobs[{idx}]不一致"
def test_usage_stream():
def test_chat_usage_stream():
"""测试payload max_tokens参数"""
data = {
"messages": [
@@ -65,9 +66,103 @@ def test_usage_stream():
usage = chunks[-1]["usage"]
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert data["max_tokens"] >= usage["completion_tokens"], "completion_tokens大于max_tokens"
assert data["metadata"]["min_tokens"] <= usage["completion_tokens"], "completion_tokens小于min_tokens"
assert usage["total_tokens"] == total_tokens, "total_tokens不等于prompt_tokens + completion_tokens"
assert data["max_tokens"] >= usage["completion_tokens"], f"completion_tokens大于max_tokens, usage: {usage}"
assert (
data["metadata"]["min_tokens"] <= usage["completion_tokens"]
), f"completion_tokens小于min_tokens, usage: {usage}"
assert (
usage["total_tokens"] == total_tokens
), f"total_tokens不等于prompt_tokens + completion_tokens, usage: {usage}"
def test_chat_usage_non_stream():
"""测试非流式 usage"""
data = {
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 50,
"stream": False,
"metadata": {"min_tokens": 10},
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(url=URL, payload=payload).json()
# print(response)
# chunks = get_stream_chunks(response)
# for idx, chunk in enumerate(chunks):
# print(f"\nchunk[{idx}]:\n{json.dumps(chunk, indent=2, ensure_ascii=False)}")
usage = response["usage"]
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert data["max_tokens"] >= usage["completion_tokens"], f"completion_tokens大于max_tokens, usage: {usage}"
assert (
data["metadata"]["min_tokens"] <= usage["completion_tokens"]
), f"completion_tokens小于min_tokens, usage: {usage}"
assert (
usage["total_tokens"] == total_tokens
), f"total_tokens不等于prompt_tokens + completion_tokens, usage: {usage}"
@pytest.mark.skip(reason="修复后打开")
def test_non_chat_usage_stream():
"""测试completions 流式 usage"""
data = {
"prompt": "牛顿的三大运动定律是什么?",
"max_tokens": 50,
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"metadata": {"min_tokens": 10},
}
completion_url = URL.replace("chat/completions", "completions")
payload = build_request_payload(TEMPLATE, data)
response = send_request(url=completion_url, payload=payload, stream=True)
chunks = get_stream_chunks(response)
# for idx, chunk in enumerate(chunks):
# print(f"\nchunk[{idx}]:\n{json.dumps(chunk, indent=2, ensure_ascii=False)}")
usage = chunks[-1]["usage"]
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert data["max_tokens"] >= usage["completion_tokens"], f"completion_tokens大于max_tokens, usage: {usage}"
assert (
data["metadata"]["min_tokens"] <= usage["completion_tokens"]
), f"completion_tokens小于min_tokens, usage: {usage}"
assert (
usage["total_tokens"] == total_tokens
), f"total_tokens不等于prompt_tokens + completion_tokens, usage: {usage}"
def test_non_chat_usage_non_stream():
"""测试completions 非流式 usage"""
data = {
"prompt": "牛顿的三大运动定律是什么?",
"max_tokens": 50,
"stream": False,
"metadata": {"min_tokens": 10},
}
completion_url = URL.replace("chat/completions", "completions")
payload = build_request_payload(TEMPLATE, data)
response = send_request(url=completion_url, payload=payload).json()
# print(response)
# chunks = get_stream_chunks(response)
# for idx, chunk in enumerate(chunks):
# print(f"\nchunk[{idx}]:\n{json.dumps(chunk, indent=2, ensure_ascii=False)}")
usage = response["usage"]
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert data["max_tokens"] >= usage["completion_tokens"], f"completion_tokens大于max_tokens, usage: {usage}"
assert (
data["metadata"]["min_tokens"] <= usage["completion_tokens"]
), f"completion_tokens小于min_tokens, usage: {usage}"
assert (
usage["total_tokens"] == total_tokens
), f"total_tokens不等于prompt_tokens + completion_tokens, usage: {usage}"
if __name__ == "__main__":