From 82bd7e5db4da1e2c8c4e87e3377b7708312064d6 Mon Sep 17 00:00:00 2001 From: kxz2002 <115912648+kxz2002@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:42:19 +0800 Subject: [PATCH] [BugFix] Fix finish reason in _create_chat_completion_choice (#4582) * fix n_param _create_chat_completion_choicel * fix unit test * fix final_res * modify unit tests --- fastdeploy/entrypoints/openai/serving_chat.py | 4 ++-- tests/entrypoints/openai/test_max_streaming_tokens.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index b18fc5102..cf11ba8ff 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -621,7 +621,7 @@ class OpenAIServingChat: if output is not None and output.get("metrics") and output["metrics"].get("request_start_time"): work_process_metrics.e2e_request_latency.observe( - time.time() - output.get("metrics").get("request_start_time") + time.time() - data.get("metrics").get("request_start_time") ) message = ChatMessage( role="assistant", @@ -655,7 +655,7 @@ class OpenAIServingChat: finish_reason = "tool_calls" else: finish_reason = "length" - if output.get("error_msg") is not None and "Recover" in output["error_msg"]: + if data.get("error_msg") is not None and "Recover" in data["error_msg"]: finish_reason = "recover_stop" return ChatCompletionResponseChoice( diff --git a/tests/entrypoints/openai/test_max_streaming_tokens.py b/tests/entrypoints/openai/test_max_streaming_tokens.py index 01b6346e0..3a772f919 100644 --- a/tests/entrypoints/openai/test_max_streaming_tokens.py +++ b/tests/entrypoints/openai/test_max_streaming_tokens.py @@ -412,7 +412,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase): "test_data": { "request_id": "test_1", "outputs": { - "token_ids": [789], + "token_ids": [123, 456, 789], "text": "Edge case response", "reasoning_content": None, "tool_call": None, @@ -424,7 +424,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase): "previous_num_tokens": 1, }, "mock_request": ChatCompletionRequest( - model="test", messages=[], return_token_ids=True, max_tokens=5, n=2 + model="test", messages=[], return_token_ids=True, max_tokens=1, n=2 ), "expected": { "index": 1, @@ -434,7 +434,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase): "raw_prediction": None, "num_cached_tokens": 0, "num_image_tokens": 0, - "finish_reason": "stop", + "finish_reason": "length", }, }, ]