diff --git a/test/ce/server/test_logprobs.py b/test/ce/server/test_logprobs.py new file mode 100644 index 000000000..4f3214b55 --- /dev/null +++ b/test/ce/server/test_logprobs.py @@ -0,0 +1,161 @@ +import json + +from core import TEMPLATE, URL, build_request_payload, send_request + + +def test_unstream_with_logprobs(): + """ + 测试非流式响应开启 logprobs 后,返回的 token 概率信息是否正确。 + """ + data = { + "stream": False, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "牛顿的三大运动定律是什么?"}, + ], + "max_tokens": 3, + } + + # 构建请求并发送 + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + resp_json = response.json() + + # 校验返回内容与概率信息 + assert resp_json["choices"][0]["message"]["content"] == "牛顿的" + assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿" + assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448 + assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == { + "token": "牛顿", + "logprob": -0.031025361269712448, + "bytes": [231, 137, 155, 233, 161, 191], + "top_logprobs": None, + } + assert resp_json["usage"] == { + "prompt_tokens": 22, + "total_tokens": 25, + "completion_tokens": 3, + "prompt_tokens_details": {"cached_tokens": 0}, + } + + +def test_unstream_without_logprobs(): + """ + 测试非流式响应关闭 logprobs 后,返回结果中不包含 logprobs 字段。 + """ + data = { + "stream": False, + "logprobs": False, + "top_logprobs": None, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "牛顿的三大运动定律是什么?"}, + ], + "max_tokens": 3, + } + + # 构建请求并发送 + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + resp_json = response.json() + + # 校验返回内容与 logprobs 字段 + assert resp_json["choices"][0]["message"]["content"] == "牛顿的" + assert resp_json["choices"][0]["logprobs"] is None + assert resp_json["usage"] == { + "prompt_tokens": 22, + "total_tokens": 25, + "completion_tokens": 3, + "prompt_tokens_details": {"cached_tokens": 0}, + } + + +def test_stream_with_logprobs(): + """ + 测试流式响应开启 logprobs 后,首个 token 的概率信息是否正确。 + """ + data = { + "stream": True, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "牛顿的三大运动定律是什么?"}, + ], + "max_tokens": 3, + } + + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + + # 解析首个包含 content 的流式 chunk + result_chunk = {} + for line in response.iter_lines(): + if not line: + continue + decoded = line.decode("utf-8").removeprefix("data: ") + if decoded == "[DONE]": + break + + chunk = json.loads(decoded) + content = chunk["choices"][0]["delta"].get("content") + if content: + result_chunk = chunk + print(json.dumps(result_chunk, indent=2, ensure_ascii=False)) + break + + # 校验概率字段 + assert result_chunk["choices"][0]["delta"]["content"] == "牛顿" + assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿" + assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448 + assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == { + "token": "牛顿", + "logprob": -0.031025361269712448, + "bytes": [231, 137, 155, 233, 161, 191], + } + + +def test_stream_without_logprobs(): + """ + 测试流式响应关闭 logprobs 后,确认响应中不包含 logprobs 字段。 + """ + data = { + "stream": True, + "logprobs": False, + "top_logprobs": None, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "牛顿的三大运动定律是什么?"}, + ], + "max_tokens": 3, + } + + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + + # 解析首个包含 content 的流式 chunk + result_chunk = {} + for line in response.iter_lines(): + if not line: + continue + decoded = line.decode("utf-8").removeprefix("data: ") + if decoded == "[DONE]": + break + + chunk = json.loads(decoded) + content = chunk["choices"][0]["delta"].get("content") + if content: + result_chunk = chunk + print(json.dumps(result_chunk, indent=2, ensure_ascii=False)) + break + + # 校验 logprobs 字段不存在 + assert result_chunk["choices"][0]["delta"]["content"] == "牛顿" + assert result_chunk["choices"][0]["logprobs"] is None + + +if __name__ == "__main__": + test_unstream_with_logprobs() + test_unstream_without_logprobs() + test_stream_with_logprobs() + test_stream_without_logprobs() diff --git a/test/ce/server/test_stream.py b/test/ce/server/test_stream.py new file mode 100644 index 000000000..4f0662421 --- /dev/null +++ b/test/ce/server/test_stream.py @@ -0,0 +1,62 @@ +import json + +from core import TEMPLATE, URL, build_request_payload, send_request + + +def test_stream_and_non_stream(): + """ + 测试接口在 stream 模式和非 stream 模式下返回的内容是否一致。 + """ + + # 构造 stream=True 的请求数据 + data = { + "stream": True, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "牛顿的三大运动定律是什么?"}, + ], + "max_tokens": 100, + } + + # 构建请求 payload 并发送流式请求 + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + + # 按行解析流式响应 + resp_chunks = [] + for line in response.iter_lines(): + if not line: + continue + + decoded = line.decode("utf-8") + if decoded.startswith("data: "): + decoded = decoded[len("data: ") :] + + if decoded == "[DONE]": + break + + resp_chunks.append(json.loads(decoded)) + + # 拼接模型最终输出内容 + final_content = "".join( + chunk["choices"][0]["delta"]["content"] + for chunk in resp_chunks + if "choices" in chunk and "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"] + ) + print(final_content) + + # 修改为 stream=False,发送普通请求 + data["stream"] = False + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload) + + # 打印格式化后的完整响应 + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + response_json = response.json() + + # 对比两种模式下输出是否一致 + assert final_content == response_json["choices"][0]["message"]["content"] + + +if __name__ == "__main__": + test_stream_and_non_stream()