diff --git a/fastdeploy/entrypoints/openai/protocol.py b/fastdeploy/entrypoints/openai/protocol.py index de6ec4fa5..845786704 100644 --- a/fastdeploy/entrypoints/openai/protocol.py +++ b/fastdeploy/entrypoints/openai/protocol.py @@ -122,7 +122,7 @@ class ChatCompletionResponseChoice(BaseModel): """ index: int message: ChatMessage - finish_reason: Optional[Literal["stop", "length", "tool_calls"]] + finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] class ChatCompletionResponse(BaseModel): diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index 6d75b6b57..876adbf87 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -222,7 +222,7 @@ class OpenAIServingChat: choice.finish_reason = "length" if res.get("error_msg") is not None and "Recover" in res["error_msg"]: - choice.finish_reason = "length" + choice.finish_reason = "recover_stop" if request.metadata is not None and request.metadata.get("training", False) and delta_text != "": choice.delta.token_ids = output["token_ids"] @@ -340,7 +340,7 @@ class OpenAIServingChat: choice.finish_reason = "length" if final_res.get("error_msg") is not None and "Recover" in final_res["error_msg"]: - choice.finish_reason = "length" + choice.finish_reason = "recover_stop" choices.append(choice) num_prompt_tokens = len(prompt_token_ids)