diff --git a/docs/get_started/installation/kunlunxin_xpu.md b/docs/get_started/installation/kunlunxin_xpu.md
index 39c1832ca..4950347ce 100644
--- a/docs/get_started/installation/kunlunxin_xpu.md
+++ b/docs/get_started/installation/kunlunxin_xpu.md
@@ -5,7 +5,7 @@
- OS: Linux
- Python: 3.10
- XPU Model: P800
-- XPU Driver Version: ≥ 5.0.21.10
+- XPU Driver Version: ≥ 5.0.21.26
- XPU Firmware Version: ≥ 1.31
Verified platform:
@@ -15,7 +15,7 @@ Verified platform:
- OS: CentOS release 7.6 (Final)
- Python: 3.10
- XPU Model: P800 (OAM Edition)
-- XPU Driver Version: 5.0.21.10
+- XPU Driver Version: 5.0.21.26
- XPU Firmware Version: 1.31
**Note:** Currently, only INTEL or Hygon CPU-based P800 (OAM Edition) servers have been verified. Other CPU types and P800 (PCIe Edition) servers have not been tested yet.
@@ -25,9 +25,9 @@ Verified platform:
```bash
mkdir Work
cd Work
-docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.0.3
+docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0
docker run --name fastdeploy-xpu --net=host -itd --privileged -v $PWD:/Work -w /Work \
- ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.0.3 \
+ ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0 \
/bin/bash
docker exec -it fastdeploy-xpu /bin/bash
```
@@ -37,7 +37,7 @@ docker exec -it fastdeploy-xpu /bin/bash
### Install PaddlePaddle
```bash
-python -m pip install paddlepaddle-xpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+python -m pip install paddlepaddle-xpu==3.1.1 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
```
Alternatively, you can install the latest version of PaddlePaddle (Not recommended)
@@ -49,7 +49,7 @@ python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/
### Install FastDeploy (**Do NOT install via PyPI source**)
```bash
-python -m pip install fastdeploy-xpu==2.0.3 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+python -m pip install fastdeploy-xpu==2.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
```
Alternatively, you can install the latest version of FastDeploy (Not recommended)
@@ -63,7 +63,7 @@ python -m pip install --pre fastdeploy-xpu -i https://www.paddlepaddle.org.cn/pa
### Install PaddlePaddle
```bash
-python -m pip install paddlepaddle-xpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+python -m pip install paddlepaddle-xpu==3.1.1 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
```
Alternatively, you can install the latest version of PaddlePaddle (Not recommended)
diff --git a/docs/usage/kunlunxin_xpu_deployment.md b/docs/usage/kunlunxin_xpu_deployment.md
index 4eb7c70f8..455152d59 100644
--- a/docs/usage/kunlunxin_xpu_deployment.md
+++ b/docs/usage/kunlunxin_xpu_deployment.md
@@ -5,8 +5,14 @@
|ERNIE-4.5-300B-A47B|32K|WINT4|4 (recommend)|export XPU_VISIBLE_DEVICES="0,1,2,3" or "4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 4 \
--max-model-len 32768 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
|ERNIE-4.5-300B-A47B|32K|WINT4|8|export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 8 \
--max-model-len 32768 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
|ERNIE-4.5-300B-A47B|128K|WINT4|8 (recommend)|export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 8 \
--max-model-len 131072 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
+|ERNIE-4.5-21B-A3B|32K|BF16|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|32K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|32K|WINT4|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|BF16|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|WINT4|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.1.0|
|ERNIE-4.5-0.3B|32K|BF16|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.0.3|
-|ERNIE-4.5-0.3B|32K|WINT8|1|export XPU_VISIBLE_DEVICES="x" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.0.3|
+|ERNIE-4.5-0.3B|32K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.0.3|
|ERNIE-4.5-0.3B|128K|BF16|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.0.3|
|ERNIE-4.5-0.3B|128K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # Specify any card
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.0.3|
diff --git a/docs/zh/get_started/installation/kunlunxin_xpu.md b/docs/zh/get_started/installation/kunlunxin_xpu.md
index 2e77dac4c..e01d7c0f7 100644
--- a/docs/zh/get_started/installation/kunlunxin_xpu.md
+++ b/docs/zh/get_started/installation/kunlunxin_xpu.md
@@ -5,7 +5,7 @@
- OS:Linux
- Python:3.10
- XPU 型号:P800
-- XPU 驱动版本:≥ 5.0.21.10
+- XPU 驱动版本:≥ 5.0.21.26
- XPU 固件版本:≥ 1.31
已验证的平台:
@@ -15,7 +15,7 @@
- OS:CentOS release 7.6 (Final)
- Python:3.10
- XPU 型号:P800(OAM 版)
-- XPU 驱动版本:5.0.21.10
+- XPU 驱动版本:5.0.21.26
- XPU 固件版本:1.31
**注:** 目前只验证过 INTEL 或海光 CPU OAM 版 P800 服务器,暂未验证其它 CPU 和 PCIe 版 P800 服务器。
@@ -25,9 +25,9 @@
```bash
mkdir Work
cd Work
-docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.0.3
+docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0
docker run --name fastdeploy-xpu --net=host -itd --privileged -v $PWD:/Work -w /Work \
- ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.0.3 \
+ ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0 \
/bin/bash
docker exec -it fastdeploy-xpu /bin/bash
```
@@ -37,7 +37,7 @@ docker exec -it fastdeploy-xpu /bin/bash
### 安装 PaddlePaddle
```bash
-python -m pip install paddlepaddle-xpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+python -m pip install paddlepaddle-xpu==3.1.1-i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
```
或者您也可以安装最新版 PaddlePaddle(不推荐)
@@ -49,7 +49,7 @@ python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/
### 安装 FastDeploy(**注意不要通过 pypi 源安装**)
```bash
-python -m pip install fastdeploy-xpu==2.0.3 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+python -m pip install fastdeploy-xpu==2.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-xpu-p800/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
```
或者你也可以安装最新版 FastDeploy(不推荐)
@@ -63,7 +63,7 @@ python -m pip install --pre fastdeploy-xpu -i https://www.paddlepaddle.org.cn/pa
### 安装 PaddlePaddle
```bash
-python -m pip install paddlepaddle-xpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
+python -m pip install paddlepaddle-xpu==3.1.1-i https://www.paddlepaddle.org.cn/packages/stable/xpu-p800/
```
或者您也可以安装最新版 PaddlePaddle(不推荐)
diff --git a/docs/zh/usage/kunlunxin_xpu_deployment.md b/docs/zh/usage/kunlunxin_xpu_deployment.md
index fa4501f5c..aabfd1492 100644
--- a/docs/zh/usage/kunlunxin_xpu_deployment.md
+++ b/docs/zh/usage/kunlunxin_xpu_deployment.md
@@ -5,6 +5,12 @@
|ERNIE-4.5-300B-A47B|32K|WINT4|4 (推荐)|export XPU_VISIBLE_DEVICES="0,1,2,3" or "4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 4 \
--max-model-len 32768 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
|ERNIE-4.5-300B-A47B|32K|WINT4|8|export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 8 \
--max-model-len 32768 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
|ERNIE-4.5-300B-A47B|128K|WINT4|8 (推荐)|export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-300B-A47B-Paddle \
--port 8188 \
--tensor-parallel-size 8 \
--max-model-len 131072 \
--max-num-seqs 64 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.0.0|
+|ERNIE-4.5-21B-A3B|32K|BF16|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|32K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|32K|WINT4|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|BF16|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|WINT8|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.1.0|
+|ERNIE-4.5-21B-A3B|128K|WINT4|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-21B-A3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--quantization "wint4" \
--gpu-memory-utilization 0.9|>=2.1.0|
|ERNIE-4.5-0.3B|32K|BF16|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.0.3|
|ERNIE-4.5-0.3B|32K|WINT8|1|export XPU_VISIBLE_DEVICES="x" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization "wint8" \
--gpu-memory-utilization 0.9|>=2.0.3|
|ERNIE-4.5-0.3B|128K|BF16|1|export XPU_VISIBLE_DEVICES="0" # 指定任意一张卡
python -m fastdeploy.entrypoints.openai.api_server \
--model PaddlePaddle/ERNIE-4.5-0.3B-Paddle \
--port 8188 \
--tensor-parallel-size 1 \
--max-model-len 131072 \
--max-num-seqs 128 \
--gpu-memory-utilization 0.9|>=2.0.3|