From ad7bb52a289c030cdf4983301cb4e046f265d26e Mon Sep 17 00:00:00 2001 From: zhuzixuan <22173224954@stu.xidian.edu.cn> Date: Tue, 29 Jul 2025 23:49:28 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BC=A0=E5=85=A5max=5Ftoken?= =?UTF-8?q?s=3D1=E6=97=B6=E7=9A=84=E6=8A=A5=E9=94=99=20(#3068)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复传入max_tokens=1时的报错 * 修复传入max_tokens=1时的报错 * 修复传入max_tokens=1时的报错 * 修复传入max_tokens=1时的报错 * 修复传入max_tokens=1时的报错 * 修复传入max_tokens=1时的报错 --- fastdeploy/output/token_processor.py | 2 ++ test/ci_use/EB_Lite/test_EB_Lite_serving.py | 22 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 1f46a0952..38d048ec8 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -344,6 +344,7 @@ class TokenProcessor: first_token_time=time.time() - task.inference_start_time, time_in_queue=task.schedule_start_time - task.preprocess_end_time, preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time, + request_start_time=task.arrival_time, ) self._record_first_token_metrics(task, current_time) @@ -476,6 +477,7 @@ class TokenProcessor: first_token_time=time.time() - task.inference_start_time, time_in_queue=task.schedule_start_time - task.preprocess_end_time, preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time, + request_start_time=task.arrival_time, ) self._record_first_token_metrics(task, current_time) diff --git a/test/ci_use/EB_Lite/test_EB_Lite_serving.py b/test/ci_use/EB_Lite/test_EB_Lite_serving.py index 9627ea773..beeb2d99d 100644 --- a/test/ci_use/EB_Lite/test_EB_Lite_serving.py +++ b/test/ci_use/EB_Lite/test_EB_Lite_serving.py @@ -696,3 +696,25 @@ def test_non_streaming_chat_completion_disable_chat_template(openai_client, caps assert hasattr(disabled_response, "choices") assert len(disabled_response.choices) > 0 assert enabled_response.choices[0].message.content == disabled_response.choices[0].message.content + + +def test_non_streaming_min_max_token_equals_one(openai_client, capsys): + """ + Test chat/completion when min_tokens equals max_tokens equals 1. + Verify it returns exactly one token. + """ + # Test non-streaming chat + response = openai_client.chat.completions.create( + model="default", + messages=[{"role": "user", "content": "Hello"}], + max_tokens=1, + temperature=0.0, + stream=False, + ) + assert hasattr(response, "choices") + assert len(response.choices) > 0 + assert hasattr(response.choices[0], "message") + assert hasattr(response.choices[0].message, "content") + # Verify usage shows exactly 1 completion token + assert hasattr(response, "usage") + assert response.usage.completion_tokens == 1