mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[BugFix] fix max streaming tokens invalid (#3799)
* Update serving_chat.py * Update serving_completion.py * Update serving_completion.py
This commit is contained in:
@@ -182,7 +182,7 @@ class OpenAIServingChat:
|
||||
if request.max_streaming_response_tokens is not None
|
||||
else (request.metadata or {}).get("max_streaming_response_tokens", 1)
|
||||
) # dierctly passed & passed in metadata
|
||||
|
||||
max_streaming_response_tokens = max(max_streaming_response_tokens, 1)
|
||||
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
|
||||
if enable_thinking is None:
|
||||
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
|
||||
@@ -370,11 +370,6 @@ class OpenAIServingChat:
|
||||
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
|
||||
choices = []
|
||||
|
||||
if choices:
|
||||
chunk.choices = choices
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
choices = []
|
||||
|
||||
if include_usage:
|
||||
completion_tokens = previous_num_tokens
|
||||
usage = UsageInfo(
|
||||
|
Reference in New Issue
Block a user