Merge branch 'feature/online/vs_think_20250813' into release/2.1

This commit is contained in:
Jiang-Jia-Jun
2025-08-14 17:13:36 +08:00
committed by GitHub
23 changed files with 1056 additions and 41 deletions

View File

@@ -141,6 +141,7 @@ class OpenAIServingChat:
previous_num_tokens = 0
num_prompt_tokens = 0
num_choices = 1
tool_called = False
max_streaming_response_tokens = (
request.max_streaming_response_tokens
if request.max_streaming_response_tokens is not None
@@ -245,20 +246,28 @@ class OpenAIServingChat:
output = res["outputs"]
delta_text = output["text"]
output_top_logprobs = output["top_logprobs"]
previous_num_tokens += len(output["token_ids"])
logprobs_res: Optional[LogProbs] = None
if request.logprobs and output_top_logprobs is not None:
logprobs_res = self._create_chat_logprobs(
output_top_logprobs, request.logprobs, request.top_logprobs
)
previous_num_tokens += len(output["token_ids"])
delta_message = DeltaMessage(
content=delta_text,
reasoning_content=output.get("reasoning_content"),
prompt_token_ids=None,
completion_token_ids=None,
tool_calls=output.get("tool_call_content", []),
)
if self.engine_client.data_processor.tool_parser_obj and not res["finished"]:
tool_delta_message = output["tool_delta_message"]
if tool_delta_message is None:
continue
delta_message = tool_delta_message
delta_message.reasoning_content = output.get("reasoning_content")
if delta_message.tool_calls:
tool_called = True
else:
delta_message = DeltaMessage(
content=delta_text,
reasoning_content=output.get("reasoning_content"),
prompt_token_ids=None,
completion_token_ids=None,
tool_calls=None,
)
choice = ChatCompletionResponseStreamChoice(
index=0,
@@ -276,10 +285,7 @@ class OpenAIServingChat:
max_tokens = request.max_completion_tokens or request.max_tokens
if has_no_token_limit or previous_num_tokens != max_tokens:
choice.finish_reason = "stop"
if (
self.engine_client.reasoning_parser == "ernie_x1"
and output.get("finish_reason", "") == "tool_calls"
):
if tool_called:
choice.finish_reason = "tool_calls"
else:
choice.finish_reason = "length"
@@ -419,7 +425,7 @@ class OpenAIServingChat:
role="assistant",
content=output["text"],
reasoning_content=output.get("reasoning_content"),
tool_calls=output.get("tool_call_content"),
tool_calls=output.get("tool_call"),
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
completion_token_ids=completion_token_ids if request.return_token_ids else None,
text_after_process=text_after_process if request.return_token_ids else None,
@@ -439,7 +445,7 @@ class OpenAIServingChat:
max_tokens = request.max_completion_tokens or request.max_tokens
if has_no_token_limit or previous_num_tokens != max_tokens:
choice.finish_reason = "stop"
if self.engine_client.reasoning_parser == "ernie_x1" and output.get("finish_reason", "") == "tool_calls":
if output.get("tool_call"):
choice.finish_reason = "tool_calls"
else:
choice.finish_reason = "length"