Release/2.1 (#3414)

* Pre ce modified (#3335) (#3360) * Pre ce modified (#3335) * update * update * fix * fix * update * update * update * fix * update * update * update * add ut fix pr(3367) * [Bug Fix] Fix V1 video bug (#3387) * fix stopseq error info (#3342) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [BugFix] Fix default log level of paddleformers (#3377) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [Polish Code] Remove useless notes * feat(log):add_request_and_response_log (#3392) * Optimize CI execution workflow. (#3371) (#3384) * fix * [BugFix] fix control signal release failed (#3374) * [BugFix] * [BugFix] * [BugFix] * [BugFix] * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> * Revert "Merge branch 'feature/online/vs_think_20250813' into release/2.1" This reverts commit 02596fc537, reversing changes made to 03347626a6. * [XPU] Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3393) * fix v1 schedule oom bug * fix v1 schedule oom bug * [BugFix] fix ErnieProcessor not set raw_prediction (#3401) * [Doc]Release fastdeploy-xpu 2.1.0 (#3407) * fix v1 schedule oom bug * fix v1 schedule oom bug * update release note * [Doc]Release fastdeploy-xpu 2.0.3 (#3408) * fix v1 schedule oom bug * fix v1 schedule oom bug * update release note * update info --------- Co-authored-by: YUNSHEN XIE <1084314248@qq.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com> Co-authored-by: JYChen <zoooo0820@qq.com> Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com> Co-authored-by: xiaolei373 <zley373@gmail.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com> Co-authored-by: yinwei <yinwei_hust@163.com> Co-authored-by: memoryCoderC <1137889088@qq.com>
2025-10-06 17:17:14 +08:00 · 2025-08-14 20:53:47 +08:00
parent e11331927f
commit 132a8ef425
30 changed files with 132 additions and 1068 deletions
--- a/fastdeploy/input/ernie_processor.py
+++ b/fastdeploy/input/ernie_processor.py
@@ -43,7 +43,7 @@ class ErnieProcessor(BaseDataProcessor):
        pad_token_id (int): 存储填充符号的token ID。
    """

-    def __init__(self, model_name_or_path, reasoning_parser_obj=None, tool_parser_obj=None):
+    def __init__(self, model_name_or_path, reasoning_parser_obj=None):

        self.model_name_or_path = model_name_or_path
        data_processor_logger.info(f"model_name_or_path: {model_name_or_path}")
@@ -63,7 +63,6 @@ class ErnieProcessor(BaseDataProcessor):
        self.reasoning_parser = None
        if reasoning_parser_obj:
            self.reasoning_parser = reasoning_parser_obj(self.tokenizer)
-        self.tool_parser_obj = tool_parser_obj

    def _init_config(self):
        self.use_hf_tokenizer = int(envs.FD_USE_HF_TOKENIZER) == 1
@@ -205,12 +204,6 @@ class ErnieProcessor(BaseDataProcessor):
            response_dict.outputs.reasoning_content = reasoning_content
        else:
            response_dict.outputs.text = full_text
-        if self.tool_parser_obj:
-            tool_parser = self.tool_parser_obj(self.tokenizer)
-            tool_call_info = tool_parser.extract_tool_calls(full_text, response_dict)
-            if tool_call_info.tools_called:
-                response_dict.outputs.tool_calls = tool_call_info.tool_calls
-                response_dict.outputs.text = tool_call_info.content
        data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}")
        if response_dict.outputs.text == "" and response_dict.outputs.reasoning_content == "":
            return None
@@ -251,20 +244,12 @@ class ErnieProcessor(BaseDataProcessor):
        delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
        if is_end:
            full_text = previous_texts + delta_text
-            if self.reasoning_parser and (
-                enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
-            ):
+            if enable_thinking and self.reasoning_parser:
                reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
                response_dict["outputs"]["text"] = text
                response_dict["outputs"]["reasoning_content"] = reasoning_content
            else:
                response_dict["outputs"]["text"] = full_text
-            if self.tool_parser_obj:
-                tool_parser = self.tool_parser_obj(self.tokenizer)
-                tool_call_info = tool_parser.extract_tool_calls(full_text, response_dict)
-                if tool_call_info.tools_called:
-                    response_dict["outputs"]["tool_call"] = tool_call_info.tool_calls
-                    response_dict["outputs"]["text"] = tool_call_info.content
            response_dict["outputs"]["raw_prediction"] = full_text
            data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
            del self.decode_status[req_id]
@@ -289,9 +274,8 @@ class ErnieProcessor(BaseDataProcessor):
            if token_ids[-1] == self.tokenizer.eos_token_id:
                token_ids = token_ids[:-1]
        delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
-        if self.reasoning_parser and (
-            enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
-        ):
+        response_dict["outputs"]["raw_prediction"] = delta_text
+        if enable_thinking and self.reasoning_parser:
            reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming(
                previous_texts,
                previous_texts + delta_text,
@@ -304,25 +288,10 @@ class ErnieProcessor(BaseDataProcessor):
            response_dict["outputs"]["reasoning_content"] = reasoning_content
        else:
            response_dict["outputs"]["text"] = delta_text
-        if self.tool_parser_obj:
-            if req_id not in self.tool_parsers:
-                self.tool_parsers[req_id] = self.tool_parser_obj(self.tokenizer)
-            tool_parser = self.tool_parsers[req_id]
-            tool_call = tool_parser.extract_tool_calls_streaming(
-                previous_texts,
-                previous_texts + delta_text,
-                delta_text,
-                previous_token_ids,
-                previous_token_ids + token_ids,
-                token_ids,
-                response_dict,
-            )
-            response_dict["outputs"]["tool_delta_message"] = tool_call
+        response_dict["outputs"]["raw_prediction"] = delta_text
        if is_end:
            data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
            del self.decode_status[req_id]
-            if req_id in self.tool_parsers:
-                del self.tool_parsers[req_id]
        return response_dict

    def messages2ids(self, request_or_messages):