[BugFix] fix max streaming tokens invalid (#3799)

* Update serving_chat.py

* Update serving_completion.py

* Update serving_completion.py
This commit is contained in:
ltd0924
2025-09-02 21:03:13 +08:00
committed by GitHub
parent 0f42771a84
commit cd09384a14
2 changed files with 2 additions and 10 deletions

View File

@@ -331,6 +331,7 @@ class OpenAIServingCompletion:
if request.max_streaming_response_tokens is not None
else (request.suffix or {}).get("max_streaming_response_tokens", 1)
) # dierctly passed & passed in suffix
max_streaming_response_tokens = max(max_streaming_response_tokens, 1)
choices = []
chunk = CompletionStreamResponse(
id=request_id,
@@ -461,10 +462,6 @@ class OpenAIServingCompletion:
)
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
if choices:
chunk.choices = choices
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
choices = []
except Exception as e:
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")