[Serving] Update trt backend to 8.5.2.2 (#1326)

* update trt backend * Add trt version args * Add cuda cudnn version
2025-12-24 13:28:13 +08:00 · 2023-02-17 14:05:04 +08:00
parent ea548ab3db
commit ee41944f47
5 changed files with 105 additions and 9 deletions
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
 COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
 COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python

-COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2

 ENV TZ=Asia/Shanghai \
    DEBIAN_FRONTEND=noninteractive \
@@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle
 COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
 COPY build/fastdeploy_install /opt/fastdeploy/

-ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
 ENV PATH="/opt/tritonserver/bin:$PATH"
--- a/serving/Dockerfile_CUDA_11_4_TRT_8_4
+++ b/serving/Dockerfile_CUDA_11_4_TRT_8_4
@@ -0,0 +1,59 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG http_proxy
+ARG https_proxy
+
+FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full
+FROM nvcr.io/nvidia/tritonserver:21.10-py3-min
+
+COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
+COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
+COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
+
+COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive \
+    DCGM_VERSION=2.2.9 \
+    http_proxy=$http_proxy \
+    https_proxy=$http_proxy
+
+RUN apt-get update \
+    && apt-key del 7fa2af80 \
+    && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
+    && dpkg -i cuda-keyring_1.0-1_all.deb \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \
+    && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlenlp fast-tokenizer-python
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy_install /opt/fastdeploy/
+
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV PATH="/opt/tritonserver/bin:$PATH"
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -18,6 +18,16 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
 ```

+The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands.
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
+```
+
 For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04，cuda11.2 environment
 ```
 # Enter the serving directory and execute the script to compile the FastDeploy and serving backend
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -15,7 +15,17 @@ bash scripts/build.sh
 # 退出到FastDeploy主目录，制作镜像
 # x.y.z为FastDeploy版本号，可根据情况自己确定。比如: 1.0.3
 cd ../
-docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile .
+```
+
+目前默认 TensorRT 版本为 8.5.2.2，如果需要切换 TensorRT 版本，则可执行以下编译命令：
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
 ```

 比如在ubuntu 20.04，cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -33,6 +33,9 @@ do
        -hs|--https_proxy)
                https_proxy="$2"
                shift;;
+        -tv|--trt_version)
+                trt_version="$2"
+                shift;;
        --)
                shift
                break;;
@@ -50,6 +53,20 @@ fi

 if [ $WITH_GPU == "ON" ]; then

+if [ -z $trt_version ]; then
+    # The optional value of trt_version: ["8.4.1.5", "8.5.2.2"]
+    trt_version="8.5.2.2"
+fi
+
+if [ $trt_version == "8.5.2.2" ]
+then
+    cuda_version="11.8"
+    cudnn_version="8.6"
+else
+    cuda_version="11.6"
+    cudnn_version="8.4"
+fi
+
 echo "start build FD GPU library"

 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
@@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
 fi

-if [ ! -d "./TensorRT-8.4.1.5/" ]; then
-    wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
+if [ ! -d "./TensorRT-${trt_version}/" ]; then
+    wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
 fi

 nvidia-docker run -i --rm --name ${docker_name} \
@@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
            export WITH_GPU=ON;
            export ENABLE_TRT_BACKEND=OFF;
-            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/;
+            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/;
            export ENABLE_ORT_BACKEND=OFF;
            export ENABLE_PADDLE_BACKEND=OFF;
            export ENABLE_OPENVINO_BACKEND=OFF;
@@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
            python setup.py bdist_wheel;
            cd /workspace/fastdeploy;
            rm -rf build; mkdir -p build;cd build;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install;
            cd /workspace/fastdeploy/serving;