[BugFix] fix max streaming tokens invalid (#3799)

* Update serving_chat.py * Update serving_completion.py * Update serving_completion.py
2025-10-05 16:48:03 +08:00 · 2025-09-02 21:03:13 +08:00
parent 0f42771a84
commit cd09384a14
2 changed files with 2 additions and 10 deletions
--- a/fastdeploy/entrypoints/openai/serving_completion.py
+++ b/fastdeploy/entrypoints/openai/serving_completion.py
@@ -331,6 +331,7 @@ class OpenAIServingCompletion:
                if request.max_streaming_response_tokens is not None
                else (request.suffix or {}).get("max_streaming_response_tokens", 1)
            )  # dierctly passed & passed in suffix
+            max_streaming_response_tokens = max(max_streaming_response_tokens, 1)
            choices = []
            chunk = CompletionStreamResponse(
                id=request_id,
@@ -461,10 +462,6 @@ class OpenAIServingCompletion:
                            )
                            yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
                        api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
-                if choices:
-                    chunk.choices = choices
-                    yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
-                    choices = []

        except Exception as e:
            api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")