support build cpu images (#341)

2025-10-05 16:48:03 +08:00 · 2022-10-11 01:17:27 -05:00
parent ce0e3fc43d
commit 5328fbc861
8 changed files with 133 additions and 13 deletions
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)
 project(trironpaddlebackend LANGUAGES C CXX)
 option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
 set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
  CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
 set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
 set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 include(FetchContent)
 FetchContent_Declare(
@@ -61,6 +66,13 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(repo-common repo-core repo-backend)
 #
 # CUDA
 #
 if(${TRITON_ENABLE_GPU})
  find_package(CUDAToolkit REQUIRED)
 endif() # TRITON_ENABLE_GPU
 configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
 add_library(
@@ -73,11 +85,7 @@ target_include_directories(
  triton-fastdeploy-backend
  PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
+    ${FASTDEPLOY_INCLUDE_PATHS}
 target_include_directories(
  triton-fastdeploy-backend
  PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
 )
 target_link_libraries(
@@ -92,6 +100,13 @@ target_compile_options(
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
 )
 if(${TRITON_ENABLE_GPU})
  target_compile_definitions(
    triton-fastdeploy-backend
    PRIVATE TRITON_ENABLE_GPU=1
  )
 endif() # TRITON_ENABLE_GPU
 set_target_properties(
  triton-fastdeploy-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
@@ -107,3 +122,11 @@ target_link_libraries(
    triton-backend-utils    # from repo-backend
    triton-core-serverstub  # from repo-core
 )
 if(${TRITON_ENABLE_GPU})
  target_link_libraries(
    triton-fastdeploy-backend
    PRIVATE
      CUDA::cudart
  )
 endif() # TRITON_ENABLE_GPU
--- a/serving/Dockfile_cpu
+++ b/serving/Dockfile_cpu
@@ -0,0 +1,32 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
 ENV TZ=Asia/Shanghai \
    DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
    && python3 -m pip install -U pip \
    && python3 -m pip install paddlepaddle faster_tokenizer 
 COPY python/dist/*.whl /opt/fastdeploy/
 RUN python3 -m pip install  /opt/fastdeploy/*.whl \
    && rm -rf /opt/fastdeploy/*.whl
 COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
 COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
 RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
 ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -12,7 +12,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 WITH_GPU=${1:-ON}
-sh build_fd_vison.sh
+if [ $WITH_GPU == "ON" ]; then
-sh build_fd_runtime.sh
+
-sh build_fd_backend.sh
+sh build_fd_vison.sh ON
 sh build_fd_runtime.sh ON
 sh build_fd_backend.sh ON
 else
 sh build_fd_vison.sh OFF
 sh build_fd_runtime.sh OFF
 sh build_fd_backend.sh OFF
 fi
--- a/serving/scripts/build_fd_backend.sh
+++ b/serving/scripts/build_fd_backend.sh
@@ -12,6 +12,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 WITH_GPU=${1:-ON}
 if [ $WITH_GPU == "ON" ]; then
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
            cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
 else
 docker run -it --rm --name build_fd_backend \
           -v`pwd`:/workspace/fastdeploy \
           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy/serving;
            rm -rf build; mkdir build; cd build;
            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
            cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
 fi
--- a/serving/scripts/build_fd_runtime.sh
+++ b/serving/scripts/build_fd_runtime.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 WITH_GPU=${1:-ON}
 if [ $WITH_GPU == "ON" ]; then
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install'
 else
 docker run -it --rm --name build_fd_runtime \
           -v`pwd`:/workspace/fastdeploy \
           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy;
            rm -rf build; mkdir build; cd build;
            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install'
 fi
--- a/serving/scripts/build_fd_vison.sh
+++ b/serving/scripts/build_fd_vison.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 WITH_GPU=${1:-ON}
 if [ $WITH_GPU == "ON" ]; then
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
            export ENABLE_TEXT=ON;
            python setup.py build;
            python setup.py bdist_wheel'
 else
 docker run -it --rm --name build_fd_vison \
           -v`pwd`:/workspace/fastdeploy \
           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy/python;
            rm -rf .setuptools-cmake-build dist;
            export WITH_GPU=OFF;
            export ENABLE_VISION=ON;
            export ENABLE_TEXT=ON;
            python setup.py build;
            python setup.py bdist_wheel'
 fi
--- a/serving/src/fastdeploy_backend_utils.h
+++ b/serving/src/fastdeploy_backend_utils.h
@@ -40,7 +40,8 @@ namespace triton {
 namespace backend {
 namespace fastdeploy_runtime {
-#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
+#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(                                  \
 RESPONSES, RESPONSES_COUNT, BOOL, X)                                           \
  do {                                                                         \
    TRITONSERVER_Error* raasnie_err__ = (X);                                   \
    if (raasnie_err__ != nullptr) {                                            \
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
      requests, request_count, &responses, model_state_->TritonMemoryManager(),
      model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
      HostPolicyName().c_str());
-  RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+  FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
      responses, request_count, all_response_failed,
      SetInputTensors(total_batch_size, requests, request_count, &responses,
                      &collector, &cuda_copy));
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
  SET_TIMESTAMP(compute_start_ns);
  if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
                                      all_response_failed,
                                      Run(&responses, request_count));
  }
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
  SET_TIMESTAMP(compute_end_ns);
  if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
        responses, request_count, all_response_failed,
        ReadOutputTensors(total_batch_size, requests, request_count,
                          &responses));
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
  // BackendOutputResponder responder(
  //     requests, request_count, responses,
  //     model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
-  //     model_state_->EnablePinnedInput(), CudaStream());
+  //     model_state_->EnablePinnedOutput(), CudaStream());
  // r21.10
  BackendOutputResponder responder(
      requests, request_count, responses, StateForModel()->MaxBatchSize(),