From 82bd7e5db4da1e2c8c4e87e3377b7708312064d6 Mon Sep 17 00:00:00 2001
From: kxz2002 <115912648+kxz2002@users.noreply.github.com>
Date: Fri, 31 Oct 2025 10:42:19 +0800
Subject: [PATCH] [BugFix] Fix finish reason in _create_chat_completion_choice 
 (#4582)

* fix n_param _create_chat_completion_choicel

* fix unit test

* fix final_res

* modify unit tests
---
 fastdeploy/entrypoints/openai/serving_chat.py         | 4 ++--
 tests/entrypoints/openai/test_max_streaming_tokens.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py
index b18fc5102..cf11ba8ff 100644
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -621,7 +621,7 @@ class OpenAIServingChat:
 
         if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"):
             work_process_metrics.e2e_request_latency.observe(
-                time.time() - output.get("metrics").get("request_start_time")
+                time.time() - data.get("metrics").get("request_start_time")
             )
         message = ChatMessage(
             role="assistant",
@@ -655,7 +655,7 @@ class OpenAIServingChat:
                 finish_reason = "tool_calls"
         else:
             finish_reason = "length"
-        if output.get("error_msg") is not None and "Recover" in output["error_msg"]:
+        if data.get("error_msg") is not None and "Recover" in data["error_msg"]:
             finish_reason = "recover_stop"
 
         return ChatCompletionResponseChoice(
diff --git a/tests/entrypoints/openai/test_max_streaming_tokens.py b/tests/entrypoints/openai/test_max_streaming_tokens.py
index 01b6346e0..3a772f919 100644
--- a/tests/entrypoints/openai/test_max_streaming_tokens.py
+++ b/tests/entrypoints/openai/test_max_streaming_tokens.py
@@ -412,7 +412,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                 "test_data": {
                     "request_id": "test_1",
                     "outputs": {
-                        "token_ids": [789],
+                        "token_ids": [123, 456, 789],
                         "text": "Edge case response",
                         "reasoning_content": None,
                         "tool_call": None,
@@ -424,7 +424,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                     "previous_num_tokens": 1,
                 },
                 "mock_request": ChatCompletionRequest(
-                    model="test", messages=[], return_token_ids=True, max_tokens=5, n=2
+                    model="test", messages=[], return_token_ids=True, max_tokens=1, n=2
                 ),
                 "expected": {
                     "index": 1,
@@ -434,7 +434,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
                     "raw_prediction": None,
                     "num_cached_tokens": 0,
                     "num_image_tokens": 0,
-                    "finish_reason": "stop",
+                    "finish_reason": "length",
                 },
             },
         ]