[BugFix] fix max streaming tokens invalid (#3789)

This commit is contained in:
ltd0924
2025-09-02 13:57:32 +08:00
committed by GitHub
parent 7e751c93ae
commit bf0cf5167a
3 changed files with 282 additions and 9 deletions

View File

@@ -183,6 +183,8 @@ class OpenAIServingChat:
else (request.metadata or {}).get("max_streaming_response_tokens", 1)
) # dierctly passed & passed in metadata
max_streaming_response_tokens = max(1, max_streaming_response_tokens)
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
@@ -370,11 +372,6 @@ class OpenAIServingChat:
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
choices = []
if choices:
chunk.choices = choices
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
choices = []
if include_usage:
completion_tokens = previous_num_tokens
usage = UsageInfo(