[Feature] Support include_stop_str_in_output in chat/completion (#2910)

* [Feature] Support include_stop_str_in_output in chat/completion * Add ci test for include_stop_str_in_output * Update version of openai * Fix ci test --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-10-05 08:37:06 +08:00 · 2025-07-18 16:59:18 +08:00
parent 6efad14b95
commit fbe3547c95
5 changed files with 82 additions and 16 deletions
--- a/fastdeploy/input/text_processor.py
+++ b/fastdeploy/input/text_processor.py
@@ -355,7 +355,7 @@ class DataProcessor(BaseDataProcessor):
        token_ids = response_dict["outputs"]["token_ids"]
        is_end = response_dict["finished"]
        req_id = response_dict["request_id"]
-        if is_end and len(token_ids) > 0:
+        if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"):
            if token_ids[-1] == self.tokenizer.eos_token_id:
                token_ids = token_ids[:-1]
        delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
@@ -390,7 +390,7 @@ class DataProcessor(BaseDataProcessor):
        req_id = response_dict["request_id"]
        token_ids = response_dict["outputs"]["token_ids"]

-        if is_end and len(token_ids) > 0:
+        if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"):
            if token_ids[-1] == self.tokenizer.eos_token_id:
                token_ids = token_ids[:-1]
        delta_text, previous_token_ids, previous_texts = self.ids2tokens(
@@ -430,7 +430,7 @@ class DataProcessor(BaseDataProcessor):
                response_dict, enable_thinking=enable_thinking, **kwargs)
        else:
            return self.process_response_dict_normal(
-                response_dict=response_dict, enable_thinking=enable_thinking)
+                response_dict=response_dict, enable_thinking=enable_thinking, **kwargs)

    def text2ids(self, text, max_model_len, raw_request=True):
        """