diff --git a/fastdeploy/reasoning/qwen3_reasoning_parsers.py b/fastdeploy/reasoning/qwen3_reasoning_parsers.py index 122291dab..9e3aae592 100644 --- a/fastdeploy/reasoning/qwen3_reasoning_parsers.py +++ b/fastdeploy/reasoning/qwen3_reasoning_parsers.py @@ -17,7 +17,7 @@ from collections.abc import Sequence from typing import Optional, Union from fastdeploy.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaMessage) + DeltaMessage) from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager @@ -67,47 +67,47 @@ class Qwen3ReasoningParser(ReasoningParser): - 'abc' goes to reasoning_content - 'xyz' goes to content """ - # Skip single special tokens if len(delta_token_ids) == 1 and (delta_token_ids[0] in [ self.think_start_token_id, self.think_end_token_id ]): return "", "" - if self.think_start_token_id in previous_token_ids: - if self.think_end_token_id in delta_token_ids: - # in previous, in delta, - # extract reasoning content + # in delta + if self.think_end_token_id in delta_token_ids: + # in delta, in delta, extract reasoning content + if self.think_start_token_id in delta_token_ids: + start_index = delta_text.find(self.think_start_token) + end_index = delta_token_ids.find(self.think_end_token) + reasoning_content = delta_text[start_index + + len(self.think_start_token + ):end_index] + content = delta_text[end_index+len(self.think_end_token):] + return reasoning_content, content + # in previous, in delta, + else: end_index = delta_text.find(self.think_end_token) reasoning_content = delta_text[:end_index] content = delta_text[end_index + len(self.think_end_token):] content = content if content else None return reasoning_content, content - elif self.think_end_token_id in previous_token_ids: - # in previous, in previous, - # reasoning content continues - return "", delta_text - else: - # in previous, no in previous or delta, - # reasoning content continues - return delta_text, "" - elif self.think_start_token_id in delta_token_ids: - if self.think_end_token_id in delta_token_ids: - # in delta, in delta, extract reasoning content - start_index = delta_text.find(self.think_start_token) - end_index = delta_text.find(self.think_end_token) - reasoning_content = delta_text[start_index + - len(self.think_start_token - ):end_index] - content = delta_text[end_index + len(self.think_end_token):] - content = content if content else None - return reasoning_content, content - else: - # in delta, no in delta, - # reasoning content continues - return delta_text, "" - else: - # thinking is disabled, just content + # in previous reasoning content continues + elif self.think_end_token_id in previous_token_ids: return "", delta_text + # in previous + elif self.think_start_token_id in previous_token_ids: + return delta_text,"" + # in delta + elif self.think_start_token_id in delta_token_ids: + start_index=delta_text.find(self.think_start_token) + reasoning_content=delta_text[start_index + len(self.think_start_token):] + content = "" + return reasoning_content, content + else: + return delta_text, "" + + + + def extract_reasoning_content( self, model_output: str, request: ChatCompletionRequest @@ -115,31 +115,47 @@ class Qwen3ReasoningParser(ReasoningParser): """ Extract reasoning content from the model output. - For text abcxyz: - - 'abc' goes to reasoning_content - - 'xyz' goes to content + 支持两种格式: + 1. abcxyz - 标准格式 + 2. abcxyz - 缺少起始标签的格式 Returns: tuple[Optional[str], Optional[str]]: reasoning content and content """ - # Check if the model output contains the and tokens. - if (self.think_start_token not in model_output - or self.think_end_token not in model_output): - return None, model_output - # Check if the is present in the model output, remove it - # if it is present. - model_output_parts = model_output.partition(self.think_start_token) - model_output = model_output_parts[2] if model_output_parts[ - 1] else model_output_parts[0] - # Check if the model output contains the tokens. - # If the end token is not found, return the model output as is. + # 检查是否包含结束标签 if self.think_end_token not in model_output: return None, model_output - # Extract reasoning content from the model output. - reasoning_content, _, content = model_output.partition( - self.think_end_token) + # 检查是否有起始标签 + if self.think_start_token in model_output: + # 标准格式:contentanswer + if (self.think_start_token not in model_output + or self.think_end_token not in model_output): + return None, model_output + # Check if the is present in the model output, remove it + # if it is present. + model_output_parts = model_output.partition(self.think_start_token) + model_output = model_output_parts[2] if model_output_parts[ + 1] else model_output_parts[0] + # Check if the model output contains the tokens. + # If the end token is not found, return the model output as is. + if self.think_end_token not in model_output: + return None, model_output - final_content = content or None - return reasoning_content, final_content \ No newline at end of file + # Extract reasoning content from the model output. + reasoning_content, _, content = model_output.partition( + self.think_end_token) + + final_content = content or None + return reasoning_content, final_content + else: + # 缺少起始标签的格式:contentanswer + parts = model_output.split(self.think_end_token, 1) + + if len(parts) == 2: + reasoning_content = parts[0].strip() + final_content = parts[1].strip() if parts[1].strip() else None + return reasoning_content, final_content + + return None, model_output