mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Serving] Update trt backend to 8.5.2.2 (#1326)
* update trt backend * Add trt version args * Add cuda cudnn version
This commit is contained in:
@@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
|
||||
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
|
||||
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
|
||||
|
||||
COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
|
||||
COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2
|
||||
|
||||
ENV TZ=Asia/Shanghai \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
@@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle
|
||||
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
|
||||
COPY build/fastdeploy_install /opt/fastdeploy/
|
||||
|
||||
ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
|
||||
ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
|
||||
ENV PATH="/opt/tritonserver/bin:$PATH"
|
||||
|
||||
59
serving/Dockerfile_CUDA_11_4_TRT_8_4
Normal file
59
serving/Dockerfile_CUDA_11_4_TRT_8_4
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full
|
||||
FROM nvcr.io/nvidia/tritonserver:21.10-py3-min
|
||||
|
||||
COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
|
||||
COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
|
||||
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
|
||||
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
|
||||
|
||||
COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
|
||||
|
||||
ENV TZ=Asia/Shanghai \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
DCGM_VERSION=2.2.9 \
|
||||
http_proxy=$http_proxy \
|
||||
https_proxy=$http_proxy
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-key del 7fa2af80 \
|
||||
&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
|
||||
&& dpkg -i cuda-keyring_1.0-1_all.deb \
|
||||
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
|
||||
&& python3 -m pip install -U pip \
|
||||
&& python3 -m pip install paddlenlp fast-tokenizer-python
|
||||
|
||||
COPY python/dist/*.whl /opt/fastdeploy/
|
||||
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||
&& rm -rf /opt/fastdeploy/*.whl
|
||||
|
||||
# unset proxy
|
||||
ENV http_proxy=
|
||||
ENV https_proxy=
|
||||
RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
|
||||
|
||||
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
|
||||
COPY build/fastdeploy_install /opt/fastdeploy/
|
||||
|
||||
ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
|
||||
ENV PATH="/opt/tritonserver/bin:$PATH"
|
||||
@@ -18,6 +18,16 @@ cd ../
|
||||
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
|
||||
```
|
||||
|
||||
The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands.
|
||||
|
||||
```
|
||||
cd serving
|
||||
bash scripts/build.sh -tv 8.4.1.5
|
||||
|
||||
cd ../
|
||||
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
|
||||
```
|
||||
|
||||
For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04,cuda11.2 environment
|
||||
```
|
||||
# Enter the serving directory and execute the script to compile the FastDeploy and serving backend
|
||||
|
||||
@@ -15,7 +15,17 @@ bash scripts/build.sh
|
||||
# 退出到FastDeploy主目录,制作镜像
|
||||
# x.y.z为FastDeploy版本号,可根据情况自己确定。比如: 1.0.3
|
||||
cd ../
|
||||
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
|
||||
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile .
|
||||
```
|
||||
|
||||
目前默认 TensorRT 版本为 8.5.2.2,如果需要切换 TensorRT 版本,则可执行以下编译命令:
|
||||
|
||||
```
|
||||
cd serving
|
||||
bash scripts/build.sh -tv 8.4.1.5
|
||||
|
||||
cd ../
|
||||
docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
|
||||
```
|
||||
|
||||
比如在ubuntu 20.04,cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像
|
||||
|
||||
@@ -33,6 +33,9 @@ do
|
||||
-hs|--https_proxy)
|
||||
https_proxy="$2"
|
||||
shift;;
|
||||
-tv|--trt_version)
|
||||
trt_version="$2"
|
||||
shift;;
|
||||
--)
|
||||
shift
|
||||
break;;
|
||||
@@ -50,6 +53,20 @@ fi
|
||||
|
||||
if [ $WITH_GPU == "ON" ]; then
|
||||
|
||||
if [ -z $trt_version ]; then
|
||||
# The optional value of trt_version: ["8.4.1.5", "8.5.2.2"]
|
||||
trt_version="8.5.2.2"
|
||||
fi
|
||||
|
||||
if [ $trt_version == "8.5.2.2" ]
|
||||
then
|
||||
cuda_version="11.8"
|
||||
cudnn_version="8.6"
|
||||
else
|
||||
cuda_version="11.6"
|
||||
cudnn_version="8.4"
|
||||
fi
|
||||
|
||||
echo "start build FD GPU library"
|
||||
|
||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
@@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
fi
|
||||
|
||||
if [ ! -d "./TensorRT-8.4.1.5/" ]; then
|
||||
wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
|
||||
tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
|
||||
rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
|
||||
if [ ! -d "./TensorRT-${trt_version}/" ]; then
|
||||
wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
|
||||
tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
|
||||
rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
|
||||
fi
|
||||
|
||||
nvidia-docker run -i --rm --name ${docker_name} \
|
||||
@@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
|
||||
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
export WITH_GPU=ON;
|
||||
export ENABLE_TRT_BACKEND=OFF;
|
||||
export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/;
|
||||
export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/;
|
||||
export ENABLE_ORT_BACKEND=OFF;
|
||||
export ENABLE_PADDLE_BACKEND=OFF;
|
||||
export ENABLE_OPENVINO_BACKEND=OFF;
|
||||
@@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
|
||||
python setup.py bdist_wheel;
|
||||
cd /workspace/fastdeploy;
|
||||
rm -rf build; mkdir -p build;cd build;
|
||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
make -j`nproc`;
|
||||
make install;
|
||||
cd /workspace/fastdeploy/serving;
|
||||
|
||||
Reference in New Issue
Block a user