From 5328fbc861e5a84e1785aee2ecebcf052a27edd8 Mon Sep 17 00:00:00 2001 From: heliqi <1101791222@qq.com> Date: Tue, 11 Oct 2022 01:17:27 -0500 Subject: [PATCH] support build cpu images (#341) --- serving/CMakeLists.txt | 33 ++++++++++++++++++++++---- serving/Dockfile_cpu | 32 +++++++++++++++++++++++++ serving/scripts/build.sh | 17 ++++++++++--- serving/scripts/build_fd_backend.sh | 15 ++++++++++++ serving/scripts/build_fd_runtime.sh | 18 ++++++++++++++ serving/scripts/build_fd_vison.sh | 20 ++++++++++++++++ serving/src/fastdeploy_backend_utils.h | 3 ++- serving/src/fastdeploy_runtime.cc | 8 +++---- 8 files changed, 133 insertions(+), 13 deletions(-) create mode 100644 serving/Dockfile_cpu diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt index d74940234..96f3d6ca8 100644 --- a/serving/CMakeLists.txt +++ b/serving/CMakeLists.txt @@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17) project(trironpaddlebackend LANGUAGES C CXX) +option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON) set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.") set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include" CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.") @@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/ set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + include(FetchContent) FetchContent_Declare( @@ -61,6 +66,13 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(repo-common repo-core repo-backend) +# +# CUDA +# +if(${TRITON_ENABLE_GPU}) + find_package(CUDAToolkit REQUIRED) +endif() # TRITON_ENABLE_GPU + configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY) add_library( @@ -73,11 +85,7 @@ target_include_directories( triton-fastdeploy-backend PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src -) - -target_include_directories( - triton-fastdeploy-backend - PRIVATE ${FASTDEPLOY_INCLUDE_PATHS} + ${FASTDEPLOY_INCLUDE_PATHS} ) target_link_libraries( @@ -92,6 +100,13 @@ target_compile_options( -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror> ) +if(${TRITON_ENABLE_GPU}) + target_compile_definitions( + triton-fastdeploy-backend + PRIVATE TRITON_ENABLE_GPU=1 + ) +endif() # TRITON_ENABLE_GPU + set_target_properties( triton-fastdeploy-backend PROPERTIES POSITION_INDEPENDENT_CODE ON @@ -107,3 +122,11 @@ target_link_libraries( triton-backend-utils # from repo-backend triton-core-serverstub # from repo-core ) + +if(${TRITON_ENABLE_GPU}) + target_link_libraries( + triton-fastdeploy-backend + PRIVATE + CUDA::cudart + ) +endif() # TRITON_ENABLE_GPU diff --git a/serving/Dockfile_cpu b/serving/Dockfile_cpu new file mode 100644 index 000000000..390272d30 --- /dev/null +++ b/serving/Dockfile_cpu @@ -0,0 +1,32 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM paddlepaddle/fastdeploy:22.09-cpu-only-min + +ENV TZ=Asia/Shanghai \ + DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \ + && python3 -m pip install -U pip \ + && python3 -m pip install paddlepaddle faster_tokenizer + +COPY python/dist/*.whl /opt/fastdeploy/ +RUN python3 -m pip install /opt/fastdeploy/*.whl \ + && rm -rf /opt/fastdeploy/*.whl + +COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/ +COPY build/fastdeploy-0.0.3 /opt/fastdeploy/ + +RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver +ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH" diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh index f03ed7c90..261a52fec 100644 --- a/serving/scripts/build.sh +++ b/serving/scripts/build.sh @@ -12,7 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +WITH_GPU=${1:-ON} -sh build_fd_vison.sh -sh build_fd_runtime.sh -sh build_fd_backend.sh +if [ $WITH_GPU == "ON" ]; then + +sh build_fd_vison.sh ON +sh build_fd_runtime.sh ON +sh build_fd_backend.sh ON + +else + +sh build_fd_vison.sh OFF +sh build_fd_runtime.sh OFF +sh build_fd_backend.sh OFF + +fi diff --git a/serving/scripts/build_fd_backend.sh b/serving/scripts/build_fd_backend.sh index 7eb639af1..5d402c5d9 100644 --- a/serving/scripts/build_fd_backend.sh +++ b/serving/scripts/build_fd_backend.sh @@ -12,6 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +WITH_GPU=${1:-ON} + +if [ $WITH_GPU == "ON" ]; then + if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz @@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \ apt-get update; apt-get install -y --no-install-recommends rapidjson-dev; export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH; cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`' +else +docker run -it --rm --name build_fd_backend \ + -v`pwd`:/workspace/fastdeploy \ + paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \ + bash -c \ + 'cd /workspace/fastdeploy/serving; + rm -rf build; mkdir build; cd build; + apt-get update; apt-get install -y --no-install-recommends rapidjson-dev; + cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`' +fi diff --git a/serving/scripts/build_fd_runtime.sh b/serving/scripts/build_fd_runtime.sh index 4f3df0b88..7525ca6a2 100644 --- a/serving/scripts/build_fd_runtime.sh +++ b/serving/scripts/build_fd_runtime.sh @@ -12,6 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +WITH_GPU=${1:-ON} + +if [ $WITH_GPU == "ON" ]; then + if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz @@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \ cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; make -j`nproc`; make install' + +else + +docker run -it --rm --name build_fd_runtime \ + -v`pwd`:/workspace/fastdeploy \ + paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \ + bash -c \ + 'cd /workspace/fastdeploy; + rm -rf build; mkdir build; cd build; + cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; + make -j`nproc`; + make install' + +fi diff --git a/serving/scripts/build_fd_vison.sh b/serving/scripts/build_fd_vison.sh index fd55d255f..e0beb6e7f 100644 --- a/serving/scripts/build_fd_vison.sh +++ b/serving/scripts/build_fd_vison.sh @@ -12,6 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +WITH_GPU=${1:-ON} + +if [ $WITH_GPU == "ON" ]; then + if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz @@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \ export ENABLE_TEXT=ON; python setup.py build; python setup.py bdist_wheel' + +else + +docker run -it --rm --name build_fd_vison \ + -v`pwd`:/workspace/fastdeploy \ + paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \ + bash -c \ + 'cd /workspace/fastdeploy/python; + rm -rf .setuptools-cmake-build dist; + export WITH_GPU=OFF; + export ENABLE_VISION=ON; + export ENABLE_TEXT=ON; + python setup.py build; + python setup.py bdist_wheel' + +fi diff --git a/serving/src/fastdeploy_backend_utils.h b/serving/src/fastdeploy_backend_utils.h index 46cc516ac..64119f9fa 100644 --- a/serving/src/fastdeploy_backend_utils.h +++ b/serving/src/fastdeploy_backend_utils.h @@ -40,7 +40,8 @@ namespace triton { namespace backend { namespace fastdeploy_runtime { -#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \ +#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \ +RESPONSES, RESPONSES_COUNT, BOOL, X) \ do { \ TRITONSERVER_Error* raasnie_err__ = (X); \ if (raasnie_err__ != nullptr) { \ diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc index 1051915ef..b1ed8b6b0 100644 --- a/serving/src/fastdeploy_runtime.cc +++ b/serving/src/fastdeploy_runtime.cc @@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, requests, request_count, &responses, model_state_->TritonMemoryManager(), model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0, HostPolicyName().c_str()); - RESPOND_ALL_AND_SET_TRUE_IF_ERROR( + FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( responses, request_count, all_response_failed, SetInputTensors(total_batch_size, requests, request_count, &responses, &collector, &cuda_copy)); @@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, SET_TIMESTAMP(compute_start_ns); if (!all_response_failed) { - RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count, + FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count, all_response_failed, Run(&responses, request_count)); } @@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, SET_TIMESTAMP(compute_end_ns); if (!all_response_failed) { - RESPOND_ALL_AND_SET_TRUE_IF_ERROR( + FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( responses, request_count, all_response_failed, ReadOutputTensors(total_batch_size, requests, request_count, &responses)); @@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors( // BackendOutputResponder responder( // requests, request_count, responses, // model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0, - // model_state_->EnablePinnedInput(), CudaStream()); + // model_state_->EnablePinnedOutput(), CudaStream()); // r21.10 BackendOutputResponder responder( requests, request_count, responses, StateForModel()->MaxBatchSize(),