add tool parser

2025-10-04 16:22:57 +08:00 · 2025-08-13 01:06:55 +08:00
parent 132a8ef425
commit bbd50c6717
23 changed files with 1050 additions and 32 deletions
--- a/fastdeploy/entrypoints/openai/tool_parsers/utils.py
+++ b/fastdeploy/entrypoints/openai/tool_parsers/utils.py
@@ -0,0 +1,137 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import json
+from json import JSONDecodeError, JSONDecoder
+from typing import Any
+
+import partial_json_parser
+from partial_json_parser.core.options import Allow
+
+
+def find_common_prefix(s1: str, s2: str) -> str:
+    """
+    Finds a common prefix that is shared between two strings, if there is one.
+    Order of arguments is NOT important.
+
+    This function is provided as a UTILITY for extracting information from JSON
+    generated by partial_json_parser, to help in ensuring that the right tokens
+    are returned in streaming, so that close-quotes, close-brackets and
+    close-braces are not returned prematurely.
+
+    e.g. find_common_prefix('{"fruit": "ap"}', '{"fruit": "apple"}') ->
+    '{"fruit": "ap'
+    """
+    prefix = ""
+    min_length = min(len(s1), len(s2))
+    for i in range(0, min_length):
+        if s1[i] == s2[i]:
+            prefix += s1[i]
+        else:
+            break
+    return prefix
+
+
+def find_common_suffix(s1: str, s2: str) -> str:
+    """
+    Finds a common suffix shared between two strings, if there is one. Order of
+    arguments is NOT important.
+    Stops when the suffix ends OR it hits an alphanumeric character
+
+    e.g. find_common_suffix('{"fruit": "ap"}', '{"fruit": "apple"}') -> '"}'
+    """
+    suffix = ""
+    min_length = min(len(s1), len(s2))
+    for i in range(1, min_length + 1):
+        if s1[-i] == s2[-i] and not s1[-i].isalnum():
+            suffix = s1[-i] + suffix
+        else:
+            break
+    return suffix
+
+
+def extract_intermediate_diff(curr: str, old: str) -> str:
+    """
+    Given two strings, extract the difference in the middle between two strings
+    that are known to have a common prefix and/or suffix.
+
+    This function is provided as a UTILITY for extracting information from JSON
+    generated by partial_json_parser, to help in ensuring that the right tokens
+    are returned in streaming, so that close-quotes, close-brackets and
+    close-braces are not returned prematurely. The order of arguments IS
+    important - the new version of the partially-parsed JSON must be the first
+    argument, and the secnod argument must be from the previous generation.
+
+    What it returns, is tokens that should be streamed to the client.
+
+    e.g. extract_intermediate_diff('{"fruit": "apple"}', '{"fruit": "ap"}')
+        -> 'ple'
+
+    """
+    suffix = find_common_suffix(curr, old)
+
+    old = old[::-1].replace(suffix[::-1], "", 1)[::-1]
+    prefix = find_common_prefix(curr, old)
+    diff = curr
+    if len(suffix):
+        diff = diff[::-1].replace(suffix[::-1], "", 1)[::-1]
+
+    if len(prefix):
+        # replace the prefix only once in case it's mirrored
+        diff = diff.replace(prefix, "", 1)
+
+    return diff
+
+
+def find_all_indices(string: str, substring: str) -> list[int]:
+    """
+    Find all (starting) indices of a substring in a given string. Useful for
+    tool call extraction
+    """
+    indices = []
+    index = -1
+    while True:
+        index = string.find(substring, index + 1)
+        if index == -1:
+            break
+        indices.append(index)
+    return indices
+
+
+# partial_json_parser doesn't support extra data and
+# JSONDecoder.raw_decode doesn't support partial JSON
+def partial_json_loads(input_str: str, flags: Allow) -> tuple[Any, int]:
+    try:
+        return (partial_json_parser.loads(input_str, flags), len(input_str))
+    except JSONDecodeError as e:
+        if "Extra data" in e.msg:
+            dec = JSONDecoder()
+            return dec.raw_decode(input_str)
+        raise
+
+
+def is_complete_json(input_str: str) -> bool:
+    try:
+        json.loads(input_str)
+        return True
+    except JSONDecodeError:
+        return False
+
+
+def consume_space(i: int, s: str) -> int:
+    while i < len(s) and s[i].isspace():
+        i += 1
+    return i