From 5328fbc861e5a84e1785aee2ecebcf052a27edd8 Mon Sep 17 00:00:00 2001
From: heliqi <1101791222@qq.com>
Date: Tue, 11 Oct 2022 01:17:27 -0500
Subject: [PATCH] support build cpu images (#341)

---
 serving/CMakeLists.txt                 | 33 ++++++++++++++++++++++----
 serving/Dockfile_cpu                   | 32 +++++++++++++++++++++++++
 serving/scripts/build.sh               | 17 ++++++++++---
 serving/scripts/build_fd_backend.sh    | 15 ++++++++++++
 serving/scripts/build_fd_runtime.sh    | 18 ++++++++++++++
 serving/scripts/build_fd_vison.sh      | 20 ++++++++++++++++
 serving/src/fastdeploy_backend_utils.h |  3 ++-
 serving/src/fastdeploy_runtime.cc      |  8 +++----
 8 files changed, 133 insertions(+), 13 deletions(-)
 create mode 100644 serving/Dockfile_cpu

diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt
index d74940234..96f3d6ca8 100644
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)
 
 project(trironpaddlebackend LANGUAGES C CXX)
 
+option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
 set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
   CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
 set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
 set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
 
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
 include(FetchContent)
 
 FetchContent_Declare(
@@ -61,6 +66,13 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(repo-common repo-core repo-backend)
 
+#
+# CUDA
+#
+if(${TRITON_ENABLE_GPU})
+  find_package(CUDAToolkit REQUIRED)
+endif() # TRITON_ENABLE_GPU
+
 configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
 
 add_library(
@@ -73,11 +85,7 @@ target_include_directories(
   triton-fastdeploy-backend
   PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_include_directories(
-  triton-fastdeploy-backend
-  PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
+    ${FASTDEPLOY_INCLUDE_PATHS}
 )
 
 target_link_libraries(
@@ -92,6 +100,13 @@ target_compile_options(
     -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
 )
 
+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(
+    triton-fastdeploy-backend
+    PRIVATE TRITON_ENABLE_GPU=1
+  )
+endif() # TRITON_ENABLE_GPU
+
 set_target_properties(
   triton-fastdeploy-backend PROPERTIES
   POSITION_INDEPENDENT_CODE ON
@@ -107,3 +122,11 @@ target_link_libraries(
     triton-backend-utils    # from repo-backend
     triton-core-serverstub  # from repo-core
 )
+
+if(${TRITON_ENABLE_GPU})
+  target_link_libraries(
+    triton-fastdeploy-backend
+    PRIVATE
+      CUDA::cudart
+  )
+endif() # TRITON_ENABLE_GPU
diff --git a/serving/Dockfile_cpu b/serving/Dockfile_cpu
new file mode 100644
index 000000000..390272d30
--- /dev/null
+++ b/serving/Dockfile_cpu
@@ -0,0 +1,32 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlepaddle faster_tokenizer 
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
+
+RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh
index f03ed7c90..261a52fec 100644
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -12,7 +12,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}
 
-sh build_fd_vison.sh
-sh build_fd_runtime.sh
-sh build_fd_backend.sh
+if [ $WITH_GPU == "ON" ]; then
+
+sh build_fd_vison.sh ON
+sh build_fd_runtime.sh ON
+sh build_fd_backend.sh ON
+
+else
+
+sh build_fd_vison.sh OFF
+sh build_fd_runtime.sh OFF
+sh build_fd_backend.sh OFF
+
+fi
diff --git a/serving/scripts/build_fd_backend.sh b/serving/scripts/build_fd_backend.sh
index 7eb639af1..5d402c5d9 100644
--- a/serving/scripts/build_fd_backend.sh
+++ b/serving/scripts/build_fd_backend.sh
@@ -12,6 +12,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
     wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
     tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
             apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
             export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
             cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
+else
+docker run -it --rm --name build_fd_backend \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy/serving;
+            rm -rf build; mkdir build; cd build;
+            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
+            cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
+fi
diff --git a/serving/scripts/build_fd_runtime.sh b/serving/scripts/build_fd_runtime.sh
index 4f3df0b88..7525ca6a2 100644
--- a/serving/scripts/build_fd_runtime.sh
+++ b/serving/scripts/build_fd_runtime.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
     wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
     tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
             cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
             make -j`nproc`;
             make install'
+
+else
+
+docker run -it --rm --name build_fd_runtime \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy;
+            rm -rf build; mkdir build; cd build;
+            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            make -j`nproc`;
+            make install'
+
+fi
diff --git a/serving/scripts/build_fd_vison.sh b/serving/scripts/build_fd_vison.sh
index fd55d255f..e0beb6e7f 100644
--- a/serving/scripts/build_fd_vison.sh
+++ b/serving/scripts/build_fd_vison.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
     wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
     tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
             export ENABLE_TEXT=ON;
             python setup.py build;
             python setup.py bdist_wheel'
+
+else
+
+docker run -it --rm --name build_fd_vison \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy/python;
+            rm -rf .setuptools-cmake-build dist;
+            export WITH_GPU=OFF;
+            export ENABLE_VISION=ON;
+            export ENABLE_TEXT=ON;
+            python setup.py build;
+            python setup.py bdist_wheel'
+
+fi
diff --git a/serving/src/fastdeploy_backend_utils.h b/serving/src/fastdeploy_backend_utils.h
index 46cc516ac..64119f9fa 100644
--- a/serving/src/fastdeploy_backend_utils.h
+++ b/serving/src/fastdeploy_backend_utils.h
@@ -40,7 +40,8 @@ namespace triton {
 namespace backend {
 namespace fastdeploy_runtime {
 
-#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
+#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(                                  \
+RESPONSES, RESPONSES_COUNT, BOOL, X)                                           \
   do {                                                                         \
     TRITONSERVER_Error* raasnie_err__ = (X);                                   \
     if (raasnie_err__ != nullptr) {                                            \
diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc
index 1051915ef..b1ed8b6b0 100644
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
       requests, request_count, &responses, model_state_->TritonMemoryManager(),
       model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
       HostPolicyName().c_str());
-  RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+  FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
       responses, request_count, all_response_failed,
       SetInputTensors(total_batch_size, requests, request_count, &responses,
                       &collector, &cuda_copy));
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
   SET_TIMESTAMP(compute_start_ns);
 
   if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
                                       all_response_failed,
                                       Run(&responses, request_count));
   }
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
   SET_TIMESTAMP(compute_end_ns);
 
   if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
         responses, request_count, all_response_failed,
         ReadOutputTensors(total_batch_size, requests, request_count,
                           &responses));
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
   // BackendOutputResponder responder(
   //     requests, request_count, responses,
   //     model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
-  //     model_state_->EnablePinnedInput(), CudaStream());
+  //     model_state_->EnablePinnedOutput(), CudaStream());
   // r21.10
   BackendOutputResponder responder(
       requests, request_count, responses, StateForModel()->MaxBatchSize(),