diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index 73459c9d4..b7e9555ce 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -298,11 +298,6 @@ class OpenAIServingChat: api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}") choices = [] - if choices: - chunk.choices = choices - yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" - choices = [] - if include_usage: completion_tokens = previous_num_tokens usage = UsageInfo( diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index 0a422d0fa..b4160366f 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -352,10 +352,6 @@ class OpenAIServingCompletion: ) yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n" api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}") - if choices: - chunk.choices = choices - yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" - choices = [] except Exception as e: yield f"data: {ErrorResponse(message=str(e), code=400).model_dump_json(exclude_unset=True)}\n\n"