[BugFix] [CP] fix max streaming tokens invalid (#3798)

* Update serving_chat.py * Update serving_completion.py
2025-10-06 00:57:33 +08:00 · 2025-09-02 21:03:36 +08:00
parent 1e19833ba5
commit cc5430e4c2
2 changed files with 2 additions and 10 deletions
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -148,7 +148,7 @@ class OpenAIServingChat:
            if request.max_streaming_response_tokens is not None
            else (request.metadata or {}).get("max_streaming_response_tokens", 1)
        )  # dierctly passed & passed in metadata
-
+        max_streaming_response_tokens = max(1, max_streaming_response_tokens)
        enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
        if enable_thinking is None:
            enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
@@ -316,11 +316,6 @@ class OpenAIServingChat:
                            api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
                        choices = []
                if choices:
                    chunk.choices = choices
                    yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
                    choices = []
            if include_usage:
                completion_tokens = previous_num_tokens
                usage = UsageInfo(
--- a/fastdeploy/entrypoints/openai/serving_completion.py
+++ b/fastdeploy/entrypoints/openai/serving_completion.py
@@ -285,6 +285,7 @@ class OpenAIServingCompletion:
                if request.max_streaming_response_tokens is not None
                else (request.suffix or {}).get("max_streaming_response_tokens", 1)
            )  # dierctly passed & passed in suffix
            max_streaming_response_tokens = max(max_streaming_response_tokens, 1)
            choices = []
            chunk = CompletionStreamResponse(
                id=request_id,
@@ -418,10 +419,6 @@ class OpenAIServingCompletion:
                            )
                            yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
                        api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
                if choices:
                    chunk.choices = choices
                    yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
                    choices = []
        except Exception as e:
            yield f"data: {ErrorResponse(message=str(e), code=400).model_dump_json(exclude_unset=True)}\n\n"