[Feature] add tool parser (#3483)

* add tool parser

* add x1 enable_thinking

* restart ci

* fix vl reasoning parser

* modify call style

* modify call style

* add offline enablethinking

* fix completion

* fix

* fix unit test

* fix unit test

* fix unit test

* fix vl reasoning parser

* fix vl reasoning parser
This commit is contained in:
luukunn
2025-08-21 17:25:44 +08:00
committed by GitHub
parent 466cbb5a99
commit 371fb3f853
14 changed files with 197 additions and 222 deletions

View File

@@ -312,7 +312,7 @@ class OpenAIServingCompletion:
output_tokens = [0] * num_choices
inference_start_time = [0] * num_choices
first_iteration = [True] * num_choices
tool_called = False
tool_called = [False] * num_choices
max_streaming_response_tokens = (
request.max_streaming_response_tokens
if request.max_streaming_response_tokens is not None
@@ -386,41 +386,32 @@ class OpenAIServingCompletion:
logprobs_res = self._create_completion_logprobs(output_top_logprobs, request.logprobs, 0)
output_tokens[idx] += 1
if self.engine_client.data_processor.tool_parser_obj and not res["finished"]:
tool_delta_message = output["tool_delta_message"]
if tool_delta_message is None:
delta_message = CompletionResponseStreamChoice(
index=idx,
text=output["text"],
prompt_token_ids=None,
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
tool_calls=None,
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
reasoning_content="",
arrival_time=arrival_time,
logprobs=logprobs_res,
)
if not res["finished"] and "delta_message" in output:
delta_message_output = output["delta_message"]
if delta_message_output is None:
continue
delta_message = CompletionResponseStreamChoice(
index=idx,
text=output["text"],
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
tool_calls=tool_delta_message.tool_calls,
reasoning_content=output.get("reasoning_content"),
arrival_time=arrival_time,
logprobs=logprobs_res,
)
if tool_delta_message.tool_calls:
tool_called = True
else:
delta_message = CompletionResponseStreamChoice(
index=idx,
text=output["text"],
prompt_token_ids=None,
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
tool_calls=None,
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
reasoning_content=output.get("reasoning_content"),
arrival_time=arrival_time,
logprobs=logprobs_res,
)
delta_message.text = delta_message_output.content or ""
delta_message.reasoning_content = delta_message_output.reasoning_content or ""
delta_message.tool_calls = delta_message_output.tool_calls
choices.append(delta_message)
output_tokens[idx] += 1
if res["finished"]:
choices[-1].finish_reason = self.calc_finish_reason(
request.max_tokens, output_tokens[idx], output, tool_called
request.max_tokens, output_tokens[idx], output, tool_called[idx]
)
send_idx = output.get("send_idx")
# 只有当 send_idx 明确为 0 时才记录日志