diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md new file mode 100644 index 000000000..ff8706e31 --- /dev/null +++ b/docs/features/tool_calling.md @@ -0,0 +1,222 @@ +# Tool_Calling + +This document describes how to configure the server in FastDeploy to use the tool parser, and how to invoke tools from the client. + +--- +## Quickstart + +### Starting FastDeploy with Tool Calling Enabled. + +Launch the server with tool-calling enabled.This example uses ERNIE-4.5-21B-A3B.Leverage the ernie-x1 reasoning parser and the ernie-x1 tool-call parser from the fastdeploy directory to extract the model’s reasoning content, response content, and the tool-calling information: + +```bash +python -m fastdeploy.entrypoints.openai.api_server + --model /models/ERNIE-4.5-21B-A3B \ + --port 8000 \ + --reasoning-parser ernie-x1 \ + --tool-call-parser ernie-x1 +``` +### Example of triggering tool calling +Make a request containing the tool to trigger the model to use the available tool: +```python +curl -X POST http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + { + "role": "user", + "content": "What's the weather in Beijing?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name, for example: Beijing" + }, + "unit": { + "type": "string", + "enum": ["c", "f"], + "description": "Temperature units: c = Celsius, f = Fahrenheit" + } + }, + "required": ["location", "unit"], + "additionalProperties": false + }, + "strict": true + } + } + ], + "stream": false + }' +``` +The example output is as follows. It shows that the model's output of the thought process `reasoning_content` and tool call information `tool_calls` was successfully parsed, and the current response content `content` is empty,`finish_reason` is `tool_calls`: +```bash +{ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "", + "multimodal_content": null, + "reasoning_content": "User wants to ... ", + "tool_calls": [ + { + "id": "chatcmpl-tool-bc90641c67e44dbfb981a79bc986fbe5", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\": \"北京\", \"unit\": \"c\"}" + } + } + ], + "finish_reason": "tool_calls" + } + } + ] +} +``` + +## Parallel Tool Calls +If the model can generate parallel tool calls, FastDeploy will return a list: +```bash +tool_calls=[ + {"id": "...", "function": {...}}, + {"id": "...", "function": {...}} +] +``` + +## Requests containing tools in the conversation history +If tool-call information exists in previous turns, you can construct the request as follows: +```python +curl -X POST "http://0.0.0.0:8000/v1/chat/completions" \ +-H "Content-Type: application/json" \ +-d '{ + "messages": [ + { + "role": "user", + "content": "Hello,What's the weather in Beijing?" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": { + "location": "Beijing", + "unit": "c" + } + } + } + ], + "thoughts": "Users need to check today's weather in Beijing." + }, + { + "role": "tool", + "tool_call_id": "call_1", + "content": { + "type": "text", + "text": "{\"location\": \"北京\",\"temperature\": \"23\",\"weather\": \"晴\",\"unit\": \"c\"}" + } + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Determine weather in my location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ + "c", + "f" + ] + } + }, + "additionalProperties": false, + "required": [ + "location", + "unit" + ] + }, + "strict": true + } + } + ], + "stream": false +}' +``` +The parsed model output is as follows, containing the thought content `reasoning_content` and the response content `content`, with `finish_reason` set to stop: +```bash +{ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Today's weather in Beijing is sunny with a temperature of 23 degrees Celsius.", + "reasoning_content": "User wants to ...", + "tool_calls": null + }, + "finish_reason": "stop" + } + ] +} +``` +## Writing a Custom Tool Parser +FastDeploy supports custom tool parser plugins. You can refer to the following address to create a `tool parser`: `fastdeploy/entrypoints/openai/tool_parser` + +A custom parser should implement: +``` python +# import the required packages +# register the tool parser to ToolParserManager +@ToolParserManager.register_module("my-parser") +class ToolParser: + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + + # implement the tool parse for non-stream call + def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest) -> ExtractToolCallInformation: + return ExtractedToolCallInformation(tools_called=False,tool_calls=[],content=text) + + # implement the tool call parse for stream call + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + return delta +``` +Enable via: +``` bash +python -m fastdeploy.entrypoints.openai.api_server +--model +--tool-parser-plugin +--tool-call-parser my-parser +``` + +--- diff --git a/docs/zh/features/tool_calling.md b/docs/zh/features/tool_calling.md new file mode 100644 index 000000000..cbd8eb867 --- /dev/null +++ b/docs/zh/features/tool_calling.md @@ -0,0 +1,232 @@ +# Tool_Calling + +本文档介绍如何在 FastDeploy 中配置服务器以使用工具解析器(tool parser),以及如何在客户端调用工具。 + +--- + +## 快速开始 + +### 启动包含解析器的FastDeploy + +使用包含思考解析器和工具解析器的命令启动服务器。下面的示例使用 ERNIE-4.5-21B-A3B。我们可以使用 fastdeploy 目录中的 ernie-x1 思考解析器(reasoning parser)和 ernie-x1 工具调用解析器(tool-call parser);从而实现解析模型的思考内容、回复内容以及工具调用信息: + +```bash +python -m fastdeploy.entrypoints.openai.api_server + --model /models/ERNIE-4.5-21B-A3B \ + --port 8000 \ + --reasoning-parser ernie-x1 \ + --tool-call-parser ernie-x1 +``` + +### 触发工具调用示例 + +构造一个包含工具的请求以触发模型调用工具: + +```python +curl -X POST http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + { + "role": "user", + "content": "北京今天天气怎么样?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "获取指定地点的当前天气", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "城市名,如:北京。" + }, + "unit": { + "type": "string", + "enum": ["c", "f"], + "description": "温度单位:c = 摄氏度,f = 华氏度" + } + }, + "required": ["location", "unit"], + "additionalProperties": false + }, + "strict": true + } + } + ] + }' +``` + +示例输出如下,可以看到成功解析出了模型输出的思考内容`reasoning_content`以及工具调用信息`tool_calls`,且当前的回复内容`content`为空,`finish_reason`为工具调用`tool_calls`: +```bash +{ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "", + "multimodal_content": null, + "reasoning_content": "User wants to ... ", + "tool_calls": [ + { + "id": "chatcmpl-tool-bc90641c67e44dbfb981a79bc986fbe5", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"location\": \"北京\", \"unit\": \"c\"}" + } + } + ], + "finish_reason": "tool_calls" + } + } + ] +} +``` +## 并行工具调用 + +如果模型能够生成多个并行的工具调用,FastDeploy 会返回一个列表: + +```bash +tool_calls=[ + {"id": "...", "function": {...}}, + {"id": "...", "function": {...}} +] +``` + +## 工具调用结果出现在历史会话中 + +如果前几轮对话中包含工具调用,可以按以下方式构造请求: + +```python +curl -X POST "http://0.0.0.0:8000/v1/chat/completions" \ +-H "Content-Type: application/json" \ +-d '{ + "messages": [ + { + "role": "user", + "content": "你好,北京天气怎么样?" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": { + "location": "北京", + "unit": "c" + } + } + } + ], + "thoughts": "用户需要查询北京今天的天气。" + }, + { + "role": "tool", + "tool_call_id": "call_1", + "content": { + "type": "text", + "text": "{\"location\": \"北京\",\"temperature\": \"23\",\"weather\": \"晴\",\"unit\": \"c\"}" + } + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "获取指定位置的当前天气。", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "城市名称,例如:北京" + }, + "unit": { + "type": "string", + "enum": [ + "c", + "f" + ], + "description": "温度单位:c = 摄氏度,f = 华氏度" + } + }, + "additionalProperties": false, + "required": [ + "location", + "unit" + ] + }, + "strict": true + } + } + ] +}' +``` +解析出的模型输出结果如下,包含思考内容`reasoning_content`与回复内容`content`,且`finish_reason`为`stop`: +```bash +{ + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "北京今天的天气是晴天,气温为23摄氏度。", + "reasoning_content": "用户想...", + "tool_calls": null + }, + "finish_reason": "stop" + } + ] +} +``` +## 编写自定义工具解析器 +FastDeploy支持自定义工具解析器插件,可以参考以下地址中的`tool parser`创建:`fastdeploy/entrypoints/openai/tool_parser` + +自定义解析器需要实现: + +```python +# import the required packages +# register the tool parser to ToolParserManager +@ToolParserManager.register_module("my-parser") +class ToolParser: + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + + # implement the tool parse for non-stream call + def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest) -> ExtractToolCallInformation: + return ExtractedToolCallInformation(tools_called=False,tool_calls=[],content=text) + + # implement the tool call parse for stream call + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + return delta +``` + +通过以下方式启用自定义解析器: + +```bash +python -m fastdeploy.entrypoints.openai.api_server +--model <模型地址> +--tool-parser-plugin <自定义工具解释器的地址> +--tool-call-parser my-parser +``` + +---