mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
Feature/online/vs think 20250813 (#3440)
* add stream * fix ernie_vl_reasoning_parsers * fix bug
This commit is contained in:
@@ -344,33 +344,46 @@ class OpenAIServingCompletion:
|
||||
logprobs_res = self._create_completion_logprobs(output_top_logprobs, request.logprobs, 0)
|
||||
|
||||
output_tokens[idx] += 1
|
||||
if self.engine_client.data_processor.tool_parser_obj and not res["finished"]:
|
||||
tool_delta_message = output["tool_delta_message"]
|
||||
if tool_delta_message is None:
|
||||
continue
|
||||
delta_message = CompletionResponseStreamChoice(
|
||||
index=idx,
|
||||
text=output["text"],
|
||||
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
|
||||
tool_calls=tool_delta_message.tool_calls,
|
||||
reasoning_content=output.get("reasoning_content"),
|
||||
arrival_time=arrival_time,
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
if tool_delta_message.tool_calls:
|
||||
tool_called = True
|
||||
base_kwargs = {
|
||||
"index": idx,
|
||||
"completion_token_ids": output.get("token_ids") if request.return_token_ids else None,
|
||||
"arrival_time": arrival_time,
|
||||
"logprobs": logprobs_res,
|
||||
}
|
||||
delta_message_kwargs = None
|
||||
if not res["finished"]:
|
||||
if "reasoning_delta_message" in output:
|
||||
reasoning_delta_message = output["reasoning_delta_message"]
|
||||
if reasoning_delta_message is not None:
|
||||
delta_message_kwargs = {
|
||||
**base_kwargs,
|
||||
"text": reasoning_delta_message.content or "",
|
||||
"reasoning_content": reasoning_delta_message.reasoning_content,
|
||||
}
|
||||
elif "tool_delta_message" in output:
|
||||
tool_delta_message = output["tool_delta_message"]
|
||||
if tool_delta_message is not None:
|
||||
delta_message_kwargs = {
|
||||
**base_kwargs,
|
||||
"text": tool_delta_message.content or "",
|
||||
"tool_calls": tool_delta_message.tool_calls,
|
||||
}
|
||||
if tool_delta_message.tool_calls:
|
||||
tool_called = True
|
||||
else:
|
||||
delta_message_kwargs = {
|
||||
**base_kwargs,
|
||||
"text": output["text"],
|
||||
}
|
||||
else:
|
||||
delta_message = CompletionResponseStreamChoice(
|
||||
index=idx,
|
||||
text=output["text"],
|
||||
prompt_token_ids=None,
|
||||
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
|
||||
tool_calls=None,
|
||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
reasoning_content=output.get("reasoning_content"),
|
||||
arrival_time=arrival_time,
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
delta_message_kwargs = {
|
||||
**base_kwargs,
|
||||
"text": output["text"],
|
||||
}
|
||||
|
||||
if delta_message_kwargs is None:
|
||||
continue
|
||||
delta_message = CompletionResponseStreamChoice(**delta_message_kwargs)
|
||||
|
||||
choices.append(delta_message)
|
||||
output_tokens[idx] += 1
|
||||
|
Reference in New Issue
Block a user