[fix]Modify follow-up push parameters and Modify the verification method for thinking length (#4086)

* 续推参数  generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式

* 续推参数  generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式

* 续推参数  generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式

* 续推参数  generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式

* add completion_token_ids

* add logger

* fix reasoning_max_tokens ParameterError

* add unittest

* add unittest

* add unittest

* add unittest

* add unittest

* add unit test
This commit is contained in:
luukunn
2025-09-19 14:26:01 +08:00
committed by GitHub
parent 66a98b44ed
commit ee9d8a840a
6 changed files with 75 additions and 24 deletions

View File

@@ -255,6 +255,16 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
assert content1 == content2
def test_with_metadata(api_url, headers, consistent_payload):
"""
Test that result is same as the base result.
"""
# request
consistent_payload["metadata"] = {"enable_thinking": True}
resp1 = requests.post(api_url, headers=headers, json=consistent_payload)
assert resp1.status_code == 200
# ==========================
# OpenAI Client Chat Completion Test
# ==========================
@@ -555,6 +565,46 @@ def test_chat_with_thinking(openai_client, capsys):
assert reasoning_tokens <= reasoning_max_tokens
def test_chat_with_completion_token_ids(openai_client):
"""Test completion_token_ids"""
response = openai_client.chat.completions.create(
model="default",
messages=[{"role": "user", "content": "Hello"}],
extra_body={
"completion_token_ids": [94936],
"return_token_ids": True,
"reasoning_max_tokens": 20,
"max_tokens": 10,
},
max_tokens=10,
stream=False,
)
assert hasattr(response, "choices")
assert len(response.choices) > 0
assert hasattr(response.choices[0], "message")
assert hasattr(response.choices[0].message, "prompt_token_ids")
assert isinstance(response.choices[0].message.prompt_token_ids, list)
assert 94936 in response.choices[0].message.prompt_token_ids
def test_chat_with_reasoning_max_tokens(openai_client):
"""Test completion_token_ids"""
assertion_executed = False
try:
openai_client.chat.completions.create(
model="default",
messages=[{"role": "user", "content": "Hello"}],
extra_body={"completion_token_ids": [18900], "return_token_ids": True, "reasoning_max_tokens": -1},
max_tokens=10,
stream=False,
)
except openai.InternalServerError as e:
error_message = str(e)
assertion_executed = True
assert "reasoning_max_tokens must be greater than 1" in error_message
assert assertion_executed, "Assertion was not executed (no exception raised)"
def test_profile_reset_block_num():
"""测试profile reset_block_num功能与baseline diff不能超过5%"""
log_file = "./log/config.log"