[CI] add CI logprobs case (#3189)

* [ci] add CI case

* [ci] add CI case

* [ci] add CI case

* [ci] add CI case

---------

Co-authored-by: ZhangYulongg <1272816783@qq.com>
This commit is contained in:
plusNew001
2025-08-08 15:47:55 +08:00
committed by GitHub
parent 71267840f7
commit d0e9a70380
2 changed files with 223 additions and 0 deletions

View File

@@ -0,0 +1,161 @@
import json
from core import TEMPLATE, URL, build_request_payload, send_request
def test_unstream_with_logprobs():
"""
测试非流式响应开启 logprobs 后,返回的 token 概率信息是否正确。
"""
data = {
"stream": False,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 3,
}
# 构建请求并发送
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
resp_json = response.json()
# 校验返回内容与概率信息
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
"token": "牛顿",
"logprob": -0.031025361269712448,
"bytes": [231, 137, 155, 233, 161, 191],
"top_logprobs": None,
}
assert resp_json["usage"] == {
"prompt_tokens": 22,
"total_tokens": 25,
"completion_tokens": 3,
"prompt_tokens_details": {"cached_tokens": 0},
}
def test_unstream_without_logprobs():
"""
测试非流式响应关闭 logprobs 后,返回结果中不包含 logprobs 字段。
"""
data = {
"stream": False,
"logprobs": False,
"top_logprobs": None,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 3,
}
# 构建请求并发送
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
resp_json = response.json()
# 校验返回内容与 logprobs 字段
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
assert resp_json["choices"][0]["logprobs"] is None
assert resp_json["usage"] == {
"prompt_tokens": 22,
"total_tokens": 25,
"completion_tokens": 3,
"prompt_tokens_details": {"cached_tokens": 0},
}
def test_stream_with_logprobs():
"""
测试流式响应开启 logprobs 后,首个 token 的概率信息是否正确。
"""
data = {
"stream": True,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 3,
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
# 解析首个包含 content 的流式 chunk
result_chunk = {}
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8").removeprefix("data: ")
if decoded == "[DONE]":
break
chunk = json.loads(decoded)
content = chunk["choices"][0]["delta"].get("content")
if content:
result_chunk = chunk
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
break
# 校验概率字段
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
"token": "牛顿",
"logprob": -0.031025361269712448,
"bytes": [231, 137, 155, 233, 161, 191],
}
def test_stream_without_logprobs():
"""
测试流式响应关闭 logprobs 后,确认响应中不包含 logprobs 字段。
"""
data = {
"stream": True,
"logprobs": False,
"top_logprobs": None,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 3,
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
# 解析首个包含 content 的流式 chunk
result_chunk = {}
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8").removeprefix("data: ")
if decoded == "[DONE]":
break
chunk = json.loads(decoded)
content = chunk["choices"][0]["delta"].get("content")
if content:
result_chunk = chunk
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
break
# 校验 logprobs 字段不存在
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"] is None
if __name__ == "__main__":
test_unstream_with_logprobs()
test_unstream_without_logprobs()
test_stream_with_logprobs()
test_stream_without_logprobs()

View File

@@ -0,0 +1,62 @@
import json
from core import TEMPLATE, URL, build_request_payload, send_request
def test_stream_and_non_stream():
"""
测试接口在 stream 模式和非 stream 模式下返回的内容是否一致。
"""
# 构造 stream=True 的请求数据
data = {
"stream": True,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
],
"max_tokens": 100,
}
# 构建请求 payload 并发送流式请求
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
# 按行解析流式响应
resp_chunks = []
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8")
if decoded.startswith("data: "):
decoded = decoded[len("data: ") :]
if decoded == "[DONE]":
break
resp_chunks.append(json.loads(decoded))
# 拼接模型最终输出内容
final_content = "".join(
chunk["choices"][0]["delta"]["content"]
for chunk in resp_chunks
if "choices" in chunk and "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]
)
print(final_content)
# 修改为 stream=False发送普通请求
data["stream"] = False
payload = build_request_payload(TEMPLATE, data)
response = send_request(URL, payload)
# 打印格式化后的完整响应
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
response_json = response.json()
# 对比两种模式下输出是否一致
assert final_content == response_json["choices"][0]["message"]["content"]
if __name__ == "__main__":
test_stream_and_non_stream()