[fix] setting disable_chat_template while passing prompt_token_ids led to response error (#3511)

* [fix] setting disable_chat_template while passing prompt_token_ids led to response error * [fix] code syntax * [test] add test case for this bug * [test] add test case for empty message list * [test] fix test case for empty message list
2025-10-30 19:36:42 +08:00 · 2025-08-21 17:33:10 +08:00
parent 8bf48dfab8
commit 1b399b91c0
2 changed files with 30 additions and 7 deletions
--- a/fastdeploy/entrypoints/openai/protocol.py
+++ b/fastdeploy/entrypoints/openai/protocol.py
@@ -563,12 +563,13 @@ class ChatCompletionRequest(BaseModel):
            if "messages" in req_dict:
                del req_dict["messages"]
        else:
-            assert len(self.messages) > 0
+            # If disable_chat_template is set, then the first message in messages will be used as the prompt.
-
+            assert (
-        # If disable_chat_template is set, then the first message in messages will be used as the prompt.
+                len(req_dict["messages"]) > 0
-        if self.disable_chat_template:
+            ), "messages can not be an empty list, unless prompt_token_ids is passed"
-            req_dict["prompt"] = req_dict["messages"][0]["content"]
+            if self.disable_chat_template:
-            del req_dict["messages"]
+                req_dict["prompt"] = req_dict["messages"][0]["content"]
                del req_dict["messages"]
        guided_json_object = None
        if self.response_format is not None:
--- a/test/ci_use/EB_Lite/test_EB_Lite_serving.py
+++ b/test/ci_use/EB_Lite/test_EB_Lite_serving.py
@@ -706,10 +706,25 @@ def test_streaming_completion_with_prompt_token_ids(openai_client, capsys):
            assert chunk.usage.prompt_tokens == 9
-def test_non_streaming_chat_completion_disable_chat_template(openai_client, capsys):
+def test_non_streaming_chat_with_disable_chat_template(openai_client, capsys):
    """
    Test disable_chat_template option in chat functionality with the local service.
    """
    enabled_response = openai_client.chat.completions.create(
        model="default",
        messages=[],
        max_tokens=10,
        temperature=0.0,
        top_p=0,
        extra_body={
            "disable_chat_template": True,
            "prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937],
        },
        stream=False,
    )
    assert hasattr(enabled_response, "choices")
    assert len(enabled_response.choices) > 0
    enabled_response = openai_client.chat.completions.create(
        model="default",
        messages=[{"role": "user", "content": "Hello, how are you?"}],
@@ -939,3 +954,10 @@ def test_streaming_completion_with_bad_words(openai_client, capsys):
        assert hasattr(chunk.choices[0], "text")
        output_1.append(chunk.choices[0].text)
    assert output_0 not in output_1
 def test_chat_with_empty_message_list(api_url, headers):
    for is_stream in [True, False]:
        payload = {"messages": [], "stream": is_stream}
        response = requests.post(api_url, headers=headers, json=payload)
        assert response.status_code == 400