mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-23 00:29:35 +08:00
[CI] add CI logprobs case (#3189)
* [ci] add CI case * [ci] add CI case * [ci] add CI case * [ci] add CI case --------- Co-authored-by: ZhangYulongg <1272816783@qq.com>
This commit is contained in:
161
test/ce/server/test_logprobs.py
Normal file
161
test/ce/server/test_logprobs.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import json
|
||||
|
||||
from core import TEMPLATE, URL, build_request_payload, send_request
|
||||
|
||||
|
||||
def test_unstream_with_logprobs():
|
||||
"""
|
||||
测试非流式响应开启 logprobs 后,返回的 token 概率信息是否正确。
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
|
||||
],
|
||||
"max_tokens": 3,
|
||||
}
|
||||
|
||||
# 构建请求并发送
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
resp_json = response.json()
|
||||
|
||||
# 校验返回内容与概率信息
|
||||
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
|
||||
"token": "牛顿",
|
||||
"logprob": -0.031025361269712448,
|
||||
"bytes": [231, 137, 155, 233, 161, 191],
|
||||
"top_logprobs": None,
|
||||
}
|
||||
assert resp_json["usage"] == {
|
||||
"prompt_tokens": 22,
|
||||
"total_tokens": 25,
|
||||
"completion_tokens": 3,
|
||||
"prompt_tokens_details": {"cached_tokens": 0},
|
||||
}
|
||||
|
||||
|
||||
def test_unstream_without_logprobs():
|
||||
"""
|
||||
测试非流式响应关闭 logprobs 后,返回结果中不包含 logprobs 字段。
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
"logprobs": False,
|
||||
"top_logprobs": None,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
|
||||
],
|
||||
"max_tokens": 3,
|
||||
}
|
||||
|
||||
# 构建请求并发送
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
resp_json = response.json()
|
||||
|
||||
# 校验返回内容与 logprobs 字段
|
||||
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
|
||||
assert resp_json["choices"][0]["logprobs"] is None
|
||||
assert resp_json["usage"] == {
|
||||
"prompt_tokens": 22,
|
||||
"total_tokens": 25,
|
||||
"completion_tokens": 3,
|
||||
"prompt_tokens_details": {"cached_tokens": 0},
|
||||
}
|
||||
|
||||
|
||||
def test_stream_with_logprobs():
|
||||
"""
|
||||
测试流式响应开启 logprobs 后,首个 token 的概率信息是否正确。
|
||||
"""
|
||||
data = {
|
||||
"stream": True,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
|
||||
],
|
||||
"max_tokens": 3,
|
||||
}
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
|
||||
# 解析首个包含 content 的流式 chunk
|
||||
result_chunk = {}
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode("utf-8").removeprefix("data: ")
|
||||
if decoded == "[DONE]":
|
||||
break
|
||||
|
||||
chunk = json.loads(decoded)
|
||||
content = chunk["choices"][0]["delta"].get("content")
|
||||
if content:
|
||||
result_chunk = chunk
|
||||
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
|
||||
break
|
||||
|
||||
# 校验概率字段
|
||||
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
|
||||
"token": "牛顿",
|
||||
"logprob": -0.031025361269712448,
|
||||
"bytes": [231, 137, 155, 233, 161, 191],
|
||||
}
|
||||
|
||||
|
||||
def test_stream_without_logprobs():
|
||||
"""
|
||||
测试流式响应关闭 logprobs 后,确认响应中不包含 logprobs 字段。
|
||||
"""
|
||||
data = {
|
||||
"stream": True,
|
||||
"logprobs": False,
|
||||
"top_logprobs": None,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
|
||||
],
|
||||
"max_tokens": 3,
|
||||
}
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
|
||||
# 解析首个包含 content 的流式 chunk
|
||||
result_chunk = {}
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode("utf-8").removeprefix("data: ")
|
||||
if decoded == "[DONE]":
|
||||
break
|
||||
|
||||
chunk = json.loads(decoded)
|
||||
content = chunk["choices"][0]["delta"].get("content")
|
||||
if content:
|
||||
result_chunk = chunk
|
||||
print(json.dumps(result_chunk, indent=2, ensure_ascii=False))
|
||||
break
|
||||
|
||||
# 校验 logprobs 字段不存在
|
||||
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"] is None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_unstream_with_logprobs()
|
||||
test_unstream_without_logprobs()
|
||||
test_stream_with_logprobs()
|
||||
test_stream_without_logprobs()
|
62
test/ce/server/test_stream.py
Normal file
62
test/ce/server/test_stream.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json
|
||||
|
||||
from core import TEMPLATE, URL, build_request_payload, send_request
|
||||
|
||||
|
||||
def test_stream_and_non_stream():
|
||||
"""
|
||||
测试接口在 stream 模式和非 stream 模式下返回的内容是否一致。
|
||||
"""
|
||||
|
||||
# 构造 stream=True 的请求数据
|
||||
data = {
|
||||
"stream": True,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "牛顿的三大运动定律是什么?"},
|
||||
],
|
||||
"max_tokens": 100,
|
||||
}
|
||||
|
||||
# 构建请求 payload 并发送流式请求
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
|
||||
# 按行解析流式响应
|
||||
resp_chunks = []
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
decoded = line.decode("utf-8")
|
||||
if decoded.startswith("data: "):
|
||||
decoded = decoded[len("data: ") :]
|
||||
|
||||
if decoded == "[DONE]":
|
||||
break
|
||||
|
||||
resp_chunks.append(json.loads(decoded))
|
||||
|
||||
# 拼接模型最终输出内容
|
||||
final_content = "".join(
|
||||
chunk["choices"][0]["delta"]["content"]
|
||||
for chunk in resp_chunks
|
||||
if "choices" in chunk and "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]
|
||||
)
|
||||
print(final_content)
|
||||
|
||||
# 修改为 stream=False,发送普通请求
|
||||
data["stream"] = False
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(URL, payload)
|
||||
|
||||
# 打印格式化后的完整响应
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
response_json = response.json()
|
||||
|
||||
# 对比两种模式下输出是否一致
|
||||
assert final_content == response_json["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_stream_and_non_stream()
|
Reference in New Issue
Block a user