From b808c4958540a9c0af9b3e28660ac70bf6ab1774 Mon Sep 17 00:00:00 2001 From: yangjianfengo1 <125249383+yangjianfengo1@users.noreply.github.com> Date: Tue, 12 Aug 2025 11:20:45 +0800 Subject: [PATCH] =?UTF-8?q?[Doc]=20=E5=A2=9E=E5=8A=A0=E4=B8=AD=E8=8B=B1?= =?UTF-8?q?=E6=96=87=E5=88=87=E6=8D=A2=20(#3318)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 增加中英文切换 * 增加中英文切换 * 修改readme --- .github/workflows/gh-pages.yml | 2 +- README.md | 3 +- README_CN.md | 93 ++++++++++++++ .../ERNIE-4.5-0.3B-Paddle.md | 3 +- .../ERNIE-4.5-21B-A3B-Paddle.md | 3 +- .../ERNIE-4.5-300B-A47B-Paddle.md | 3 +- .../ERNIE-4.5-VL-28B-A3B-Paddle.md | 1 + .../ERNIE-4.5-VL-424B-A47B-Paddle.md | 1 + .../FAQ.md | 0 .../README.md | 0 .../ERNIE-4.5-0.3B-Paddle.md | 1 + .../ERNIE-4.5-21B-A3B-Paddle.md | 1 + .../ERNIE-4.5-300B-A47B-Paddle.md | 1 + .../ERNIE-4.5-VL-28B-A3B-Paddle.md | 1 + .../ERNIE-4.5-VL-424B-A47B-Paddle.md | 1 + .../FAQ.md | 0 .../README.md | 0 .../get_started/installation/iluvatar_gpu.md | 1 + mkdocs.yml | 113 ++++++++++++++++-- 19 files changed, 213 insertions(+), 15 deletions(-) create mode 100644 README_CN.md rename docs/{optimal_deployment => best_practices}/ERNIE-4.5-0.3B-Paddle.md (99%) rename docs/{optimal_deployment => best_practices}/ERNIE-4.5-21B-A3B-Paddle.md (99%) rename docs/{optimal_deployment => best_practices}/ERNIE-4.5-300B-A47B-Paddle.md (99%) rename docs/{optimal_deployment => best_practices}/ERNIE-4.5-VL-28B-A3B-Paddle.md (99%) rename docs/{optimal_deployment => best_practices}/ERNIE-4.5-VL-424B-A47B-Paddle.md (99%) rename docs/{optimal_deployment => best_practices}/FAQ.md (100%) rename docs/{optimal_deployment => best_practices}/README.md (100%) rename docs/zh/{optimal_deployment => best_practices}/ERNIE-4.5-0.3B-Paddle.md (99%) rename docs/zh/{optimal_deployment => best_practices}/ERNIE-4.5-21B-A3B-Paddle.md (99%) rename docs/zh/{optimal_deployment => best_practices}/ERNIE-4.5-300B-A47B-Paddle.md (99%) rename docs/zh/{optimal_deployment => best_practices}/ERNIE-4.5-VL-28B-A3B-Paddle.md (99%) rename docs/zh/{optimal_deployment => best_practices}/ERNIE-4.5-VL-424B-A47B-Paddle.md (99%) rename docs/zh/{optimal_deployment => best_practices}/FAQ.md (100%) rename docs/zh/{optimal_deployment => best_practices}/README.md (100%) diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 17234b639..e9de057c2 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: 3.x - - run: pip install mkdocs-material mkdocs-get-deps mkdocs-material-extensions mkdocs-multilang + - run: pip install mkdocs-material mkdocs-get-deps mkdocs-material-extensions mkdocs-multilang mkdocs-static-i18n - name: Deploy to GitHub Pages env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index 0e635bf46..936b8d2af 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +English | [简体中文](README_CN.md)
@@ -68,7 +69,7 @@ Learn how to use FastDeploy through our documentation: - [Offline Inference Development](./docs/offline_inference.md) - [Online Service Deployment](./docs/online_serving/README.md) - [Full Supported Models List](./docs/supported_models.md) -- [Optimal Deployment](./docs/optimal_deployment/README.md) +- [Best Practices](./docs/best_practices/README.md) ## Supported Models diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 000000000..eb89ee12a --- /dev/null +++ b/README_CN.md @@ -0,0 +1,93 @@ +[English](README.md) | 简体中文 + + + + + +-------------------------------------------------------------------------------- +# FastDeploy 2.0:基于飞桨的大语言模型与视觉语言模型推理部署工具包 + +## 最新活动 + +**[2025-07] 《FastDeploy2.0推理部署实测》专题活动已上线!** 完成文心4.5系列开源模型的推理部署等任务,即可获得骨瓷马克杯等FastDeploy2.0官方周边及丰富奖金!🎁 欢迎大家体验反馈~ 📌[报名地址](https://www.wjx.top/vm/meSsp3L.aspx#) 📌[活动详情](https://github.com/PaddlePaddle/FastDeploy/discussions/2728) + +## 关于 + +**FastDeploy** 是基于飞桨(PaddlePaddle)的大语言模型(LLM)与视觉语言模型(VLM)推理部署工具包,提供**开箱即用的生产级部署方案**,核心技术特性包括: + +- 🚀 **负载均衡式PD分解**:工业级解决方案,支持上下文缓存与动态实例角色切换,在保障SLO达标和吞吐量的同时优化资源利用率 +- 🔄 **统一KV缓存传输**:轻量级高性能传输库,支持智能NVLink/RDMA选择 +- 🤝 **OpenAI API服务与vLLM兼容**:单命令部署,兼容[vLLM](https://github.com/vllm-project/vllm/)接口 +- 🧮 **全量化格式支持**:W8A16、W8A8、W4A16、W4A8、W2A16、FP8等 +- ⏩ **高级加速技术**:推测解码、多令牌预测(MTP)及分块预填充 +- 🖥️ **多硬件支持**:NVIDIA GPU、昆仑芯XPU、海光DCU、昇腾NPU、天数智芯GPU、燧原GCU、沐曦GPU等 + + +## 要求 + +- 操作系统: Linux +- Python: 3.10 ~ 3.12 + +## 安装 + +FastDeploy 支持在**英伟达(NVIDIA)GPU**、**昆仑芯(Kunlunxin)XPU**、**天数(Iluvatar)GPU**、**燧原(Enflame)GCU** 以及其他硬件上进行推理部署。详细安装说明如下: + +- [英伟达 GPU](./docs/zh/get_started/installation/nvidia_gpu.md) +- [昆仑芯 XPU](./docs/zh/get_started/installation/kunlunxin_xpu.md) +- [天数 CoreX](./docs/zh/get_started/installation/iluvatar_gpu.md) +- [燧原 S60](./docs/zh/get_started/installation/Enflame_gcu.md) + +**注意:** 我们正在积极拓展硬件支持范围。目前,包括昇腾(Ascend)NPU、海光(Hygon)DCU 和摩尔线程(MetaX)GPU 在内的其他硬件平台正在开发测试中。敬请关注更新! + +## 入门指南 + +通过我们的文档了解如何使用 FastDeploy: +- [10分钟快速部署](./docs/zh/get_started/quick_start.md) +- [ERNIE-4.5 部署](./docs/zh/get_started/ernie-4.5.md) +- [ERNIE-4.5-VL 部署](./docs/zh/get_started/ernie-4.5-vl.md) +- [离线推理](./docs/zh/offline_inference.md) +- [在线服务](./docs/zh/online_serving/README.md) +- [模型支持列表](./docs/zh/supported_models.md) +- [最佳实践](./docs/zh/best_practices/README.md) + +## 支持模型列表 + +| Model | Data Type | PD Disaggregation | Chunked Prefill | Prefix Caching | MTP | CUDA Graph | Maximum Context Length | +|:--- | :------- | :---------- | :-------- | :-------- | :----- | :----- | :----- | +|ERNIE-4.5-300B-A47B | BF16/WINT4/WINT8/W4A8C8/WINT2/FP8 | ✅| ✅ | ✅|✅(WINT4)| WIP |128K | +|ERNIE-4.5-300B-A47B-Base| BF16/WINT4/WINT8 | ✅| ✅ | ✅|✅(WINT4)| WIP | 128K | +|ERNIE-4.5-VL-424B-A47B | BF16/WINT4/WINT8 | WIP | ✅ | WIP | ❌ | WIP |128K | +|ERNIE-4.5-VL-28B-A3B | BF16/WINT4/WINT8 | ❌ | ✅ | WIP | ❌ | WIP |128K | +|ERNIE-4.5-21B-A3B | BF16/WINT4/WINT8/FP8 | ❌ | ✅ | ✅ | WIP | ✅|128K | +|ERNIE-4.5-21B-A3B-Base | BF16/WINT4/WINT8/FP8 | ❌ | ✅ | ✅ | WIP | ✅|128K | +|ERNIE-4.5-0.3B | BF16/WINT8/FP8 | ❌ | ✅ | ✅ | ❌ | ✅| 128K | + +## 进阶用法 + +- [量化](./docs/zh/quantization/README.md) +- [分离式部署](./docs/zh/features/disaggregated.md) +- [投机解码](./docs/zh/features/speculative_decoding.md) +- [前缀缓存](./docs/zh/features/prefix_caching.md) +- [分块预填充](./docs/zh/features/chunked_prefill.md) + +## 致谢 + +FastDeploy 依据 [Apache-2.0 开源许可证](./LICENSE). 进行授权。在开发过程中,我们参考并借鉴了 [vLLM](https://github.com/vllm-project/vllm) 的部分代码,以保持接口兼容性,在此表示衷心感谢。 diff --git a/docs/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md b/docs/best_practices/ERNIE-4.5-0.3B-Paddle.md similarity index 99% rename from docs/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md rename to docs/best_practices/ERNIE-4.5-0.3B-Paddle.md index 66cbb8a16..890822c29 100644 --- a/docs/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md +++ b/docs/best_practices/ERNIE-4.5-0.3B-Paddle.md @@ -2,7 +2,8 @@ ## Environmental Preparation ### 1.1 Hardware requirements The minimum number of GPUs required to deploy `ERNIE-4.5-0.3B` on the following hardware for each quantization is as follows: -| | WINT8 | WINT4 | FP8 | + +| | WINT8 | WINT4 | FP8 | |-----|-----|-----|-----| |H800 80GB| 1 | 1 | 1 | |A800 80GB| 1 | 1 | / | diff --git a/docs/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md b/docs/best_practices/ERNIE-4.5-21B-A3B-Paddle.md similarity index 99% rename from docs/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md rename to docs/best_practices/ERNIE-4.5-21B-A3B-Paddle.md index 50029db81..5754d6b0a 100644 --- a/docs/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md +++ b/docs/best_practices/ERNIE-4.5-21B-A3B-Paddle.md @@ -2,7 +2,8 @@ ## Environmental Preparation ### 1.1 Hardware requirements The minimum number of GPUs required to deploy `ERNIE-4.5-21B-A3B` on the following hardware for each quantization is as follows: -| | WINT8 | WINT4 | FP8 | + +| | WINT8 | WINT4 | FP8 | |-----|-----|-----|-----| |H800 80GB| 1 | 1 | 1 | |A800 80GB| 1 | 1 | / | diff --git a/docs/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md b/docs/best_practices/ERNIE-4.5-300B-A47B-Paddle.md similarity index 99% rename from docs/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md rename to docs/best_practices/ERNIE-4.5-300B-A47B-Paddle.md index a7eb9499c..285e2e044 100644 --- a/docs/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md +++ b/docs/best_practices/ERNIE-4.5-300B-A47B-Paddle.md @@ -2,7 +2,8 @@ ## Environmental Preparation ### 1.1 Hardware requirements The minimum number of GPUs required to deploy `ERNIE-4.5-300B-A47B` on the following hardware for each quantization is as follows: -| | WINT8 | WINT4 | FP8 | WINT2 | W4A8 | + +| | WINT8 | WINT4 | FP8 | WINT2 | W4A8 | |-----|-----|-----|-----|-----|-----| |H800 80GB| 8 | 4 | 8 | 2 | 4 | |A800 80GB| 8 | 4 | / | 2 | 4 | diff --git a/docs/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md b/docs/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md similarity index 99% rename from docs/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md rename to docs/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md index e79a7158c..d839049d2 100644 --- a/docs/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md +++ b/docs/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md @@ -5,6 +5,7 @@ ### 1.1 Support Status The minimum number of cards required for deployment on the following hardware is as follows: + | Device [GPU Mem] | WINT4 | WINT8 | BFLOAT16 | |:----------:|:----------:|:------:| :------:| | A30 [24G] | 2 | 2 | 4 | diff --git a/docs/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md b/docs/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md similarity index 99% rename from docs/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md rename to docs/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md index 899ce425a..ea536ffb0 100644 --- a/docs/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md +++ b/docs/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md @@ -4,6 +4,7 @@ ## 1. Environment Preparation ### 1.1 Support Status The minimum number of cards required for deployment on the following hardware is as follows: + | Device [GPU Mem] | WINT4 | WINT8 | BFLOAT16 | |:----------:|:----------:|:------:| :------:| | H20 [144G] | 8 | 8 | 8 | diff --git a/docs/optimal_deployment/FAQ.md b/docs/best_practices/FAQ.md similarity index 100% rename from docs/optimal_deployment/FAQ.md rename to docs/best_practices/FAQ.md diff --git a/docs/optimal_deployment/README.md b/docs/best_practices/README.md similarity index 100% rename from docs/optimal_deployment/README.md rename to docs/best_practices/README.md diff --git a/docs/zh/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md b/docs/zh/best_practices/ERNIE-4.5-0.3B-Paddle.md similarity index 99% rename from docs/zh/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md rename to docs/zh/best_practices/ERNIE-4.5-0.3B-Paddle.md index 4533a6fee..bdfdbb275 100644 --- a/docs/zh/optimal_deployment/ERNIE-4.5-0.3B-Paddle.md +++ b/docs/zh/best_practices/ERNIE-4.5-0.3B-Paddle.md @@ -2,6 +2,7 @@ ## 一、环境准备 ### 1.1 支持情况 ERNIE-4.5-0.3B 各量化精度,在下列硬件上部署所需要的最小卡数如下: + | | WINT8 | WINT4 | FP8 | |-----|-----|-----|-----| |H800 80GB| 1 | 1 | 1 | diff --git a/docs/zh/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md b/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Paddle.md similarity index 99% rename from docs/zh/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md rename to docs/zh/best_practices/ERNIE-4.5-21B-A3B-Paddle.md index 9c975662f..8b494d890 100644 --- a/docs/zh/optimal_deployment/ERNIE-4.5-21B-A3B-Paddle.md +++ b/docs/zh/best_practices/ERNIE-4.5-21B-A3B-Paddle.md @@ -2,6 +2,7 @@ ## 一、环境准备 ### 1.1 支持情况 ERNIE-4.5-21B-A3B 各量化精度,在下列硬件上部署所需要的最小卡数如下: + | | WINT8 | WINT4 | FP8 | |-----|-----|-----|-----| |H800 80GB| 1 | 1 | 1 | diff --git a/docs/zh/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md b/docs/zh/best_practices/ERNIE-4.5-300B-A47B-Paddle.md similarity index 99% rename from docs/zh/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md rename to docs/zh/best_practices/ERNIE-4.5-300B-A47B-Paddle.md index e91d9b176..b265c75a1 100644 --- a/docs/zh/optimal_deployment/ERNIE-4.5-300B-A47B-Paddle.md +++ b/docs/zh/best_practices/ERNIE-4.5-300B-A47B-Paddle.md @@ -2,6 +2,7 @@ ## 一、环境准备 ### 1.1 支持情况 ERNIE-4.5-300B-A47B各量化精度,在下列硬件上部署所需要的最小卡数如下: + | | WINT8 | WINT4 | FP8 | WINT2 | W4A8 | |-----|-----|-----|-----|-----|-----| |H800 80GB| 8 | 4 | 8 | 2 | 4 | diff --git a/docs/zh/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md b/docs/zh/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md similarity index 99% rename from docs/zh/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md rename to docs/zh/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md index 5888fd6d7..f5b18de53 100644 --- a/docs/zh/optimal_deployment/ERNIE-4.5-VL-28B-A3B-Paddle.md +++ b/docs/zh/best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md @@ -4,6 +4,7 @@ ## 一、环境准备 ### 1.1 支持情况 在下列硬件上部署所需要的最小卡数如下: + | 设备[显存] | WINT4 | WINT8 | BFLOAT16 | |:----------:|:----------:|:------:| :------:| | A30 [24G] | 2 | 2 | 4 | diff --git a/docs/zh/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md b/docs/zh/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md similarity index 99% rename from docs/zh/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md rename to docs/zh/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md index 032e7d37a..fafaefa7d 100644 --- a/docs/zh/optimal_deployment/ERNIE-4.5-VL-424B-A47B-Paddle.md +++ b/docs/zh/best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md @@ -4,6 +4,7 @@ ## 一、环境准备 ### 1.1 支持情况 在下列硬件上部署所需要的最小卡数如下: + | 设备[显存] | WINT4 | WINT8 | BFLOAT16 | |:----------:|:----------:|:------:| :------:| | H20 [144G] | 8 | 8 | 8 | diff --git a/docs/zh/optimal_deployment/FAQ.md b/docs/zh/best_practices/FAQ.md similarity index 100% rename from docs/zh/optimal_deployment/FAQ.md rename to docs/zh/best_practices/FAQ.md diff --git a/docs/zh/optimal_deployment/README.md b/docs/zh/best_practices/README.md similarity index 100% rename from docs/zh/optimal_deployment/README.md rename to docs/zh/best_practices/README.md diff --git a/docs/zh/get_started/installation/iluvatar_gpu.md b/docs/zh/get_started/installation/iluvatar_gpu.md index f1ab2b38d..c01ffe93a 100644 --- a/docs/zh/get_started/installation/iluvatar_gpu.md +++ b/docs/zh/get_started/installation/iluvatar_gpu.md @@ -3,6 +3,7 @@ ## 准备机器 首先您需要准备以下配置的机器 + | CPU | 内存 | 天数 | 硬盘| |-----|------|-----|-----| | x86 | 1TB| 8xBI150| 1TB| diff --git a/mkdocs.yml b/mkdocs.yml index 9ab270d1e..31bfc07d1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,13 +1,102 @@ site_name: 'FastDeploy 2.0: Large Language Model Deployement' +repo_url: https://github.com/PaddlePaddle/FastDeploy +repo_name: FastDeploy + +theme: + name: material + highlightjs: true + icon: + repo: fontawesome/brands/github + palette: + - media: "(prefers-color-scheme: light)" # 浅色 + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" # 深色 + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to system preference + +plugins: + - search + - i18n: + docs_structure: folder + fallback_to_default: true + reconfigure_material: true + reconfigure_search: true + languages: + - locale: en + default: true + name: English + site_name: 'FastDeploy 2.0: Large Language Model Deployement' + build: true + - locale: zh + name: 简体中文 + site_name: 飞桨大语言模型推理部署工具包 + link: /zh/ + nav_translations: + FastDeploy 2.0: FastDeploy 2.0 + Quick Start: 快速入门 + Installation: 安装 + Nvidia GPU: 英伟达 GPU + KunlunXin XPU: 昆仑芯 XPU + HYGON DCU: 海光 DCU + Enflame S60: 燧原 S60 + Iluvatar CoreX: 天数 CoreX + Quick Deployment For ERNIE-4.5-0.3B: ERNIE-4.5-0.3B快速部署 + Quick Deployment for ERNIE-4.5-VL-28B-A3B: ERNIE-4.5-VL-28B-A3B快速部署 + ERNIE-4.5-300B-A47B: ERNIE-4.5-300B-A47B快速部署 + ERNIE-4.5-VL-424B-A47B: ERNIE-4.5-VL-424B-A47B快速部署 + Online Serving: 在线服务 + OpenAI-Compitable API Server: 兼容 OpenAI 协议的服务化部署 + Monitor Metrics: 监控Metrics + Scheduler: 调度器 + Offline Inference: 离线推理 + Optimal Deployment: 最佳实践 + ERNIE-4.5-0.3B: ERNIE-4.5-0.3B + ERNIE-4.5-21B-A3B: ERNIE-4.5-21B-A3B + ERNIE-4.5-300B-A47B: ERNIE-4.5-300B-A47B + ERNIE-4.5-VL-28B-A3B: ERNIE-4.5-VL-28B-A3B + ERNIE-4.5-VL-424B-A47B: ERNIE-4.5-VL-424B-A47B + FAQ: 常见问题 + Quantization: 量化 + Overview: 概述 + Online Quantization: 在线量化 + WINT2 Quantization: WINT2量化 + Features: 特性 + Prefix Caching: 前缀缓存 + Disaggregation: 分离式部署 + Chunked Prefill: 分块预填充 + Load Balance: 负载均衡 + Speculative Decoding: 投机解码 + Structured Outputs: 结构化输出 + Reasoning Output: 思考链内容 + Early Stop: 早停功能 + Plugins: 插件机制 + Sampling: 采样策略 + Supported Models: 支持模型列表 + Benchmark: 基准测试 + Usage: 用法 + Log Description: 日志说明 + Code Overview: 代码概述 + Environment Variables: 环境变量 + nav: - 'FastDeploy 2.0': index.md - 'Quick Start': - Installation: - 'Nvidia GPU': get_started/installation/nvidia_gpu.md - 'KunlunXin XPU': get_started/installation/kunlunxin_xpu.md + - 'HYGON DCU': get_started/installation/hygon_dcu.md - 'Enflame S60': get_started/installation/Enflame_gcu.md - 'Iluvatar CoreX': get_started/installation/iluvatar_gpu.md - - 'Quick Deployment For ERNIE-4.5-0.3B-Paddle': get_started/quick_start.md + - 'Quick Deployment For ERNIE-4.5-0.3B': get_started/quick_start.md - 'Quick Deployment for ERNIE-4.5-VL-28B-A3B': get_started/quick_start_vl.md - 'ERNIE-4.5-300B-A47B': get_started/ernie-4.5.md - 'ERNIE-4.5-VL-424B-A47B': get_started/ernie-4.5-vl.md @@ -16,28 +105,32 @@ nav: - 'Monitor Metrics': online_serving/metrics.md - 'Scheduler': online_serving/scheduler.md - 'Offline Inference': offline_inference.md - - Quantiation: + - Optimal Deployment: + - ERNIE-4.5-0.3B: best_practices/ERNIE-4.5-0.3B-Paddle.md + - ERNIE-4.5-21B-A3B: best_practices/ERNIE-4.5-21B-A3B-Paddle.md + - ERNIE-4.5-300B-A47B: best_practices/ERNIE-4.5-300B-A47B-Paddle.md + - ERNIE-4.5-VL-28B-A3B: best_practices/ERNIE-4.5-VL-28B-A3B-Paddle.md + - ERNIE-4.5-VL-424B-A47B: best_practices/ERNIE-4.5-VL-424B-A47B-Paddle.md + - FAQ: best_practices/FAQ.md + - Quantization: - 'Overview': quantization/README.md - 'Online Quantization': quantization/online_quantization.md - 'WINT2 Quantization': quantization/wint2.md - Features: - 'Prefix Caching': features/prefix_caching.md - - 'Disaggration': features/disaggregated.md + - 'Disaggregation': features/disaggregated.md - 'Chunked Prefill': features/chunked_prefill.md - 'Load Balance': features/load_balance.md - 'Speculative Decoding': features/speculative_decoding.md - 'Structured Outputs': features/structured_outputs.md - 'Reasoning Output': features/reasoning_output.md + - 'Early Stop': features/early_stop.md + - 'Plugins': features/plugins.md + - 'Sampling': features/sampling.md - 'Supported Models': supported_models.md - Benchmark: benchmark.md - Usage: - 'Log Description': usage/log.md - 'Code Overview': usage/code_overview.md - 'Environment Variables': usage/environment_variables.md -theme: - name: 'material' - highlightjs: true - icon: - repo: fontawesome/brands/github -repo_url: https://github.com/PaddlePaddle/FastDeploy -repo_name: FastDeploy +