mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[fix]Modify follow-up push parameters and Modify the verification method for thinking length (#4086)
* 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * add completion_token_ids * add logger * fix reasoning_max_tokens ParameterError * add unittest * add unittest * add unittest * add unittest * add unittest * add unit test
This commit is contained in:
@@ -255,6 +255,16 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
|
||||
assert content1 == content2
|
||||
|
||||
|
||||
def test_with_metadata(api_url, headers, consistent_payload):
|
||||
"""
|
||||
Test that result is same as the base result.
|
||||
"""
|
||||
# request
|
||||
consistent_payload["metadata"] = {"enable_thinking": True}
|
||||
resp1 = requests.post(api_url, headers=headers, json=consistent_payload)
|
||||
assert resp1.status_code == 200
|
||||
|
||||
|
||||
# ==========================
|
||||
# OpenAI Client Chat Completion Test
|
||||
# ==========================
|
||||
@@ -555,6 +565,46 @@ def test_chat_with_thinking(openai_client, capsys):
|
||||
assert reasoning_tokens <= reasoning_max_tokens
|
||||
|
||||
|
||||
def test_chat_with_completion_token_ids(openai_client):
|
||||
"""Test completion_token_ids"""
|
||||
response = openai_client.chat.completions.create(
|
||||
model="default",
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
extra_body={
|
||||
"completion_token_ids": [94936],
|
||||
"return_token_ids": True,
|
||||
"reasoning_max_tokens": 20,
|
||||
"max_tokens": 10,
|
||||
},
|
||||
max_tokens=10,
|
||||
stream=False,
|
||||
)
|
||||
assert hasattr(response, "choices")
|
||||
assert len(response.choices) > 0
|
||||
assert hasattr(response.choices[0], "message")
|
||||
assert hasattr(response.choices[0].message, "prompt_token_ids")
|
||||
assert isinstance(response.choices[0].message.prompt_token_ids, list)
|
||||
assert 94936 in response.choices[0].message.prompt_token_ids
|
||||
|
||||
|
||||
def test_chat_with_reasoning_max_tokens(openai_client):
|
||||
"""Test completion_token_ids"""
|
||||
assertion_executed = False
|
||||
try:
|
||||
openai_client.chat.completions.create(
|
||||
model="default",
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
extra_body={"completion_token_ids": [18900], "return_token_ids": True, "reasoning_max_tokens": -1},
|
||||
max_tokens=10,
|
||||
stream=False,
|
||||
)
|
||||
except openai.InternalServerError as e:
|
||||
error_message = str(e)
|
||||
assertion_executed = True
|
||||
assert "reasoning_max_tokens must be greater than 1" in error_message
|
||||
assert assertion_executed, "Assertion was not executed (no exception raised)"
|
||||
|
||||
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_file = "./log/config.log"
|
||||
|
||||
Reference in New Issue
Block a user