[fix] setting disable_chat_template while passing prompt_token_ids led to response error (#3228)

* [fix] setting disable_chat_template while passing prompt_token_ids led to response error * [fix] code syntax * [test] add test case for this bug * [test] add test case for empty message list * [test] fix test case for empty message list
2025-10-06 09:07:10 +08:00 · 2025-08-21 17:30:51 +08:00
parent 371fb3f853
commit e4f0b755b4
2 changed files with 23 additions and 7 deletions
--- a/fastdeploy/entrypoints/openai/protocol.py
+++ b/fastdeploy/entrypoints/openai/protocol.py
@@ -607,9 +607,10 @@ class ChatCompletionRequest(BaseModel):
            if "messages" in req_dict:
                del req_dict["messages"]
        else:
            assert len(self.messages) > 0
            # If disable_chat_template is set, then the first message in messages will be used as the prompt.
            assert (
                len(req_dict["messages"]) > 0
            ), "messages can not be an empty list, unless prompt_token_ids is passed"
            if self.disable_chat_template:
                req_dict["prompt"] = req_dict["messages"][0]["content"]
                del req_dict["messages"]
--- a/tests/ci_use/EB_Lite/test_EB_Lite_serving.py
+++ b/tests/ci_use/EB_Lite/test_EB_Lite_serving.py
@@ -707,10 +707,25 @@ def test_streaming_completion_with_prompt_token_ids(openai_client, capsys):
            assert chunk.usage.prompt_tokens == 9
-def test_non_streaming_chat_completion_disable_chat_template(openai_client, capsys):
+def test_non_streaming_chat_with_disable_chat_template(openai_client, capsys):
    """
    Test disable_chat_template option in chat functionality with the local service.
    """
    enabled_response = openai_client.chat.completions.create(
        model="default",
        messages=[],
        max_tokens=10,
        temperature=0.0,
        top_p=0,
        extra_body={
            "disable_chat_template": True,
            "prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937],
        },
        stream=False,
    )
    assert hasattr(enabled_response, "choices")
    assert len(enabled_response.choices) > 0
    enabled_response = openai_client.chat.completions.create(
        model="default",
        messages=[{"role": "user", "content": "Hello, how are you?"}],