diff --git a/fastdeploy/reasoning/qwen3_reasoning_parsers.py b/fastdeploy/reasoning/qwen3_reasoning_parsers.py
index 122291dab..9e3aae592 100644
--- a/fastdeploy/reasoning/qwen3_reasoning_parsers.py
+++ b/fastdeploy/reasoning/qwen3_reasoning_parsers.py
@@ -17,7 +17,7 @@ from collections.abc import Sequence
from typing import Optional, Union
from fastdeploy.entrypoints.openai.protocol import (ChatCompletionRequest,
- DeltaMessage)
+ DeltaMessage)
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
@@ -67,47 +67,47 @@ class Qwen3ReasoningParser(ReasoningParser):
- 'abc' goes to reasoning_content
- 'xyz' goes to content
"""
- # Skip single special tokens
if len(delta_token_ids) == 1 and (delta_token_ids[0] in [
self.think_start_token_id, self.think_end_token_id
]):
return "", ""
- if self.think_start_token_id in previous_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in previous, in delta,
- # extract reasoning content
+ # in delta
+ if self.think_end_token_id in delta_token_ids:
+ # in delta, in delta, extract reasoning content
+ if self.think_start_token_id in delta_token_ids:
+ start_index = delta_text.find(self.think_start_token)
+ end_index = delta_token_ids.find(self.think_end_token)
+ reasoning_content = delta_text[start_index +
+ len(self.think_start_token
+ ):end_index]
+ content = delta_text[end_index+len(self.think_end_token):]
+ return reasoning_content, content
+ # in previous, in delta,
+ else:
end_index = delta_text.find(self.think_end_token)
reasoning_content = delta_text[:end_index]
content = delta_text[end_index + len(self.think_end_token):]
content = content if content else None
return reasoning_content, content
- elif self.think_end_token_id in previous_token_ids:
- # in previous, in previous,
- # reasoning content continues
- return "", delta_text
- else:
- # in previous, no in previous or delta,
- # reasoning content continues
- return delta_text, ""
- elif self.think_start_token_id in delta_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in delta, in delta, extract reasoning content
- start_index = delta_text.find(self.think_start_token)
- end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[start_index +
- len(self.think_start_token
- ):end_index]
- content = delta_text[end_index + len(self.think_end_token):]
- content = content if content else None
- return reasoning_content, content
- else:
- # in delta, no in delta,
- # reasoning content continues
- return delta_text, ""
- else:
- # thinking is disabled, just content
+ # in previous reasoning content continues
+ elif self.think_end_token_id in previous_token_ids:
return "", delta_text
+ # in previous
+ elif self.think_start_token_id in previous_token_ids:
+ return delta_text,""
+ # in delta
+ elif self.think_start_token_id in delta_token_ids:
+ start_index=delta_text.find(self.think_start_token)
+ reasoning_content=delta_text[start_index + len(self.think_start_token):]
+ content = ""
+ return reasoning_content, content
+ else:
+ return delta_text, ""
+
+
+
+
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
@@ -115,31 +115,47 @@ class Qwen3ReasoningParser(ReasoningParser):
"""
Extract reasoning content from the model output.
- For text abcxyz:
- - 'abc' goes to reasoning_content
- - 'xyz' goes to content
+ 支持两种格式:
+ 1. abcxyz - 标准格式
+ 2. abcxyz - 缺少起始标签的格式
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""
- # Check if the model output contains the and tokens.
- if (self.think_start_token not in model_output
- or self.think_end_token not in model_output):
- return None, model_output
- # Check if the is present in the model output, remove it
- # if it is present.
- model_output_parts = model_output.partition(self.think_start_token)
- model_output = model_output_parts[2] if model_output_parts[
- 1] else model_output_parts[0]
- # Check if the model output contains the tokens.
- # If the end token is not found, return the model output as is.
+ # 检查是否包含结束标签
if self.think_end_token not in model_output:
return None, model_output
- # Extract reasoning content from the model output.
- reasoning_content, _, content = model_output.partition(
- self.think_end_token)
+ # 检查是否有起始标签
+ if self.think_start_token in model_output:
+ # 标准格式:contentanswer
+ if (self.think_start_token not in model_output
+ or self.think_end_token not in model_output):
+ return None, model_output
+ # Check if the is present in the model output, remove it
+ # if it is present.
+ model_output_parts = model_output.partition(self.think_start_token)
+ model_output = model_output_parts[2] if model_output_parts[
+ 1] else model_output_parts[0]
+ # Check if the model output contains the tokens.
+ # If the end token is not found, return the model output as is.
+ if self.think_end_token not in model_output:
+ return None, model_output
- final_content = content or None
- return reasoning_content, final_content
\ No newline at end of file
+ # Extract reasoning content from the model output.
+ reasoning_content, _, content = model_output.partition(
+ self.think_end_token)
+
+ final_content = content or None
+ return reasoning_content, final_content
+ else:
+ # 缺少起始标签的格式:contentanswer
+ parts = model_output.split(self.think_end_token, 1)
+
+ if len(parts) == 2:
+ reasoning_content = parts[0].strip()
+ final_content = parts[1].strip() if parts[1].strip() else None
+ return reasoning_content, final_content
+
+ return None, model_output