【CI case】for echo finish_reason text_after_process and raw_prediction check (#3630)

* Add ci case for min token and max token

* 【CI case】include total_tokens in the last packet of completion interface stream output

* echo&finish_reason&text_after_process&raw_prediction check

* echo&finish_reason&text_after_process&raw_prediction check

* echo&finish_reason&text_after_process&raw_prediction check

* echo&finish_reason&text_after_process&raw_prediction check

* echo&finish_reason&text_after_process&raw_prediction check

---------

Co-authored-by: xujing43 <xujing43@baidu.com>
This commit is contained in:
xjkmfa
2025-08-27 15:21:16 +08:00
committed by GitHub
parent 5ad8721506
commit afb9f327ef
4 changed files with 386 additions and 5 deletions

View File

@@ -183,7 +183,7 @@ jobs:
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
export TEMPLATE=TOKEN_LOGPROB export TEMPLATE=TOKEN_LOGPROB
TEST_EXIT_CODE=0 TEST_EXIT_CODE=0
python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py || TEST_EXIT_CODE=1 python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py test_completions.py test_return_token_ids.py || TEST_EXIT_CODE=1
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \ curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}" -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"

View File

@@ -11,8 +11,7 @@ import json
from core import TEMPLATE, URL, build_request_payload, send_request from core import TEMPLATE, URL, build_request_payload, send_request
URL = URL.replace("/v1/chat/completions", "/v1/completions") COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
def test_completion_total_tokens(): def test_completion_total_tokens():
data = { data = {
@@ -22,7 +21,7 @@ def test_completion_total_tokens():
} }
payload = build_request_payload(TEMPLATE, data) payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload, stream=True) resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None last_data = None
for line in resp.iter_lines(decode_unicode=True): for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]": if line.strip() == "data: [DONE]":
@@ -35,3 +34,202 @@ def test_completion_total_tokens():
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"] total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert "total_tokens" in usage, "total_tokens 不存在" assert "total_tokens" in usage, "total_tokens 不存在"
assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确" assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确"
def test_completion_echo_stream_one_prompt_rti():
"""
测试echo参数在流式回复中且设置为仅回复一个prompt
"""
data = {
"prompt": "水果的营养价值是如何的?",
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2,
"return_token_ids": True,
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)
counter += 1
if counter == 2: # 当计数器为2时保存第二包数据
second_data = stream_data
break # 如果只需要第二包数据,可以在这里直接退出循环
text = second_data["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"
def test_completion_echo_stream_one_prompt():
"""
测试echo参数在流式回复中且设置为仅回复一个prompt
"""
data = {
"prompt": "水果的营养价值是如何的?",
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)
counter += 1
if counter == 1: # 当计数器为1时保存第一包数据
second_data = stream_data
break # 如果只需要第二包数据,可以在这里直接退出循环
text = second_data["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"
def test_completion_echo_stream_more_prompt():
"""
测试echo参数在流式回复中且设置为回复多个prompt
"""
data = {
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2,
"return_token_ids": True
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
# 初始化字典来存储每个index的第二包数据
second_data_by_index = {0: None, 1: None}
# 初始化字典来记录每个index的包计数
packet_count_by_index = {0: 0, 1: 0}
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)
for choice in stream_data.get("choices", []):
index = choice.get("index")
if index in packet_count_by_index:
packet_count_by_index[index] += 1
if packet_count_by_index[index] == 2:
second_data_by_index[index] = choice
if all(value is not None for value in second_data_by_index.values()):
break
text_0 = second_data_by_index[0]["text"]
text_1 = second_data_by_index[1]["text"]
assert data["prompt"][0] in text_0, "echo回显不正确"
assert data["prompt"][1] in text_1, "echo回显不正确"
position_0 = text_0.find(data["prompt"][0])
assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
position_1 = text_1.find(data["prompt"][1])
assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"
def test_completion_echo_one_prompt():
"""
测试echo参数在非流式回复中且设置为仅发送一个prompt
"""
data = {
"stream": False,
"prompt": "水果的营养价值是如何的?",
"echo": True,
"max_tokens": 100,
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload)
response = response.json()
text = response["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"
def test_completion_echo_more_prompt():
"""
测试echo参数在非流式回复中且设置为发送多个prompt
"""
data = {
"stream": False,
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
"echo": True,
"max_tokens": 100
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()
text_0 = response["choices"][0]["text"]
text_1 = response["choices"][1]["text"]
assert data["prompt"][0] in text_0, "echo回显不正确"
assert data["prompt"][1] in text_1, "echo回显不正确"
position_0 = text_0.find(data["prompt"][0])
assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
position_1 = text_1.find(data["prompt"][1])
assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"
def test_completion_finish_length():
"""
非流式回复中,因达到max_token截断检查finish_reasoning参数
"""
data = {
"stream": False,
"prompt": "水果的营养价值是如何的?",
"max_tokens": 10
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()
finish_reason = response["choices"][0]["finish_reason"]
assert finish_reason == "length", "达到max_token时finish_reason不为length"
def test_completion_finish_stop():
"""
非流式回复中,模型自然回复完成检查finish_reasoning参数
"""
data = {
"stream": False,
"prompt": "简短的回答我:苹果是水果吗?"
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()
finish_reason = response["choices"][0]["finish_reason"]
assert finish_reason == "stop", "无任何中介finish_reason不为stop"

View File

@@ -0,0 +1,183 @@
#!/bin/env python3
# -*- coding: utf-8 -*-
# @author xujing43
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
"""
Checking for /v1/completions parameters
"""
import json
from core import (
TEMPLATE,
URL,
build_request_payload,
send_request,
)
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
def test_completion_stream_text_after_process_raw_prediction():
"""
/v1/completions接口, stream=True
返回属性"text_after_process""reasoning_content"
"""
data = {
"prompt": "你是谁",
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"max_tokens": 50,
"return_token_ids": True
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
response_data = json.loads(line)
choice = response_data["choices"][0]
if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
text_after_process = choice["text_after_process"]
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
else:
raw_prediction = choice["raw_prediction"]
reasoning_content = choice["reasoning_content"]
text = choice["text"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
if "finish_reason" in line.strip() :
break
def test_completion_text_after_process_raw_predictio_return_tokrn_ids():
"""
/v1/completions接口,非流式接口
返回属性"text_after_process""reasoning_content"
"""
data = {
"stream": False,
"prompt": "你是谁",
"max_tokens": 50,
"return_token_ids": True
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload).json()
text_after_process = resp["choices"][0]["text_after_process"]
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
raw_prediction = resp["choices"][0]["raw_prediction"]
reasoning_content = resp["choices"][0]["reasoning_content"]
text = resp["choices"][0]["text"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
def test_completion_text_after_process_raw_prediction():
"""
/v1/completions接口,无return_tokrn_ids参数
非流式接口中,无return token ids 属性"text_after_process""reasoning_content"值为null
"""
data = {
"stream": False,
"prompt": "你是谁",
"max_tokens": 50
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload).json()
text_after_process = resp["choices"][0]["text_after_process"]
assert text_after_process is None, "text_after_process取值结果不正确"
raw_prediction = resp["choices"][0]["raw_prediction"]
assert raw_prediction is None, "raw_prediction取值结果不正确"
def test_stream_text_after_process_raw_prediction():
"""
/v1/chat/completions接口,"stream": True
返回属性"text_after_process""reasoning_content"
"""
data = {
"messages": [{"role": "user", "content": "你是谁"}],
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"max_tokens": 50,
"return_token_ids": True
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload, stream=True)
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]" :
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
response_data = json.loads(line)
choice = response_data["choices"][0]
if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
text_after_process = choice["delta"]["text_after_process"]
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
else:
raw_prediction = choice["delta"]["raw_prediction"]
reasoning_content = choice["delta"]["reasoning_content"]
content = choice["delta"]["content"]
assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
if "finish_reason" in line.strip() :
break
def test_text_after_process_raw_prediction_return_tokrn_ids():
"""
/v1/chat/completions接口,非流式接口
返回属性"text_after_process""reasoning_content"
"""
data = {
"stream": False,
"messages": [{"role": "user", "content": "你是谁"}],
"max_tokens": 50,
"return_token_ids": True,
"logprobs": False,
"top_logprobs": None,
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload).json()
text_after_process = resp["choices"][0]["message"]["text_after_process"]
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
text = resp["choices"][0]["message"]["content"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
def test_text_after_process_raw_prediction():
"""
/v1/chat/completions接口,无return_tokrn_ids参数
无return token ids 属性"text_after_process""reasoning_content"值为null
"""
data = {
"stream": False,
"messages": [{"role": "user", "content": "你是谁"}],
"max_tokens": 50,
"logprobs": False,
"top_logprobs": None,
}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload).json()
text_after_process = resp["choices"][0]["message"]["text_after_process"]
assert text_after_process is None, "text_after_process取值结果不正确"
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
assert raw_prediction is None, "raw_prediction取值结果不正确"

View File

@@ -30,7 +30,7 @@ if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36
fi fi
# Exclude any files under the 'test/ce/server/' directory from code style checks. # Exclude any files under the 'test/ce/server/' directory from code style checks.
diff_files=$(git diff --name-only --diff-filter=ACMR ${BRANCH} | grep -v '^test/ce/server/') diff_files=$(git diff --name-only --diff-filter=ACMR ${BRANCH} | grep -v '^tests/ce/server/')
num_diff_files=$(echo "$diff_files" | wc -l) num_diff_files=$(echo "$diff_files" | wc -l)
echo -e "diff files between pr and ${BRANCH}:\n${diff_files}" echo -e "diff files between pr and ${BRANCH}:\n${diff_files}"