support build cpu images (#341)

2025-12-24 13:28:13 +08:00 · 2022-10-11 01:17:27 -05:00
parent ce0e3fc43d
commit 5328fbc861
8 changed files with 133 additions and 13 deletions
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)

 project(trironpaddlebackend LANGUAGES C CXX)

+option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
 set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
  CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
 set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
 set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")

+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
 include(FetchContent)

 FetchContent_Declare(
@@ -61,6 +66,13 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(repo-common repo-core repo-backend)

+#
+# CUDA
+#
+if(${TRITON_ENABLE_GPU})
+  find_package(CUDAToolkit REQUIRED)
+endif() # TRITON_ENABLE_GPU
+
 configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)

 add_library(
@@ -73,11 +85,7 @@ target_include_directories(
  triton-fastdeploy-backend
  PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_include_directories(
-  triton-fastdeploy-backend
-  PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
+    ${FASTDEPLOY_INCLUDE_PATHS}
 )

 target_link_libraries(
@@ -92,6 +100,13 @@ target_compile_options(
    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
 )

+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(
+    triton-fastdeploy-backend
+    PRIVATE TRITON_ENABLE_GPU=1
+  )
+endif() # TRITON_ENABLE_GPU
+
 set_target_properties(
  triton-fastdeploy-backend PROPERTIES
  POSITION_INDEPENDENT_CODE ON
@@ -107,3 +122,11 @@ target_link_libraries(
    triton-backend-utils    # from repo-backend
    triton-core-serverstub  # from repo-core
 )
+
+if(${TRITON_ENABLE_GPU})
+  target_link_libraries(
+    triton-fastdeploy-backend
+    PRIVATE
+      CUDA::cudart
+  )
+endif() # TRITON_ENABLE_GPU
--- a/serving/Dockfile_cpu
+++ b/serving/Dockfile_cpu
@@ -0,0 +1,32 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlepaddle faster_tokenizer 
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
+
+RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -12,7 +12,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}

-sh build_fd_vison.sh
-sh build_fd_runtime.sh
-sh build_fd_backend.sh
+if [ $WITH_GPU == "ON" ]; then
+
+sh build_fd_vison.sh ON
+sh build_fd_runtime.sh ON
+sh build_fd_backend.sh ON
+
+else
+
+sh build_fd_vison.sh OFF
+sh build_fd_runtime.sh OFF
+sh build_fd_backend.sh OFF
+
+fi
--- a/serving/scripts/build_fd_backend.sh
+++ b/serving/scripts/build_fd_backend.sh
@@ -12,6 +12,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
            cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
+else
+docker run -it --rm --name build_fd_backend \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy/serving;
+            rm -rf build; mkdir build; cd build;
+            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
+            cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
+fi
--- a/serving/scripts/build_fd_runtime.sh
+++ b/serving/scripts/build_fd_runtime.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install'
+
+else
+
+docker run -it --rm --name build_fd_runtime \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy;
+            rm -rf build; mkdir build; cd build;
+            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            make -j`nproc`;
+            make install'
+
+fi
--- a/serving/scripts/build_fd_vison.sh
+++ b/serving/scripts/build_fd_vison.sh
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+WITH_GPU=${1:-ON}
+
+if [ $WITH_GPU == "ON" ]; then
+
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
    wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
    tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
            export ENABLE_TEXT=ON;
            python setup.py build;
            python setup.py bdist_wheel'
+
+else
+
+docker run -it --rm --name build_fd_vison \
+           -v`pwd`:/workspace/fastdeploy \
+           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
+           bash -c \
+           'cd /workspace/fastdeploy/python;
+            rm -rf .setuptools-cmake-build dist;
+            export WITH_GPU=OFF;
+            export ENABLE_VISION=ON;
+            export ENABLE_TEXT=ON;
+            python setup.py build;
+            python setup.py bdist_wheel'
+
+fi
--- a/serving/src/fastdeploy_backend_utils.h
+++ b/serving/src/fastdeploy_backend_utils.h
@@ -40,7 +40,8 @@ namespace triton {
 namespace backend {
 namespace fastdeploy_runtime {

-#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
+#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(                                  \
+RESPONSES, RESPONSES_COUNT, BOOL, X)                                           \
  do {                                                                         \
    TRITONSERVER_Error* raasnie_err__ = (X);                                   \
    if (raasnie_err__ != nullptr) {                                            \
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
      requests, request_count, &responses, model_state_->TritonMemoryManager(),
      model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
      HostPolicyName().c_str());
-  RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+  FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
      responses, request_count, all_response_failed,
      SetInputTensors(total_batch_size, requests, request_count, &responses,
                      &collector, &cuda_copy));
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
  SET_TIMESTAMP(compute_start_ns);

  if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
                                      all_response_failed,
                                      Run(&responses, request_count));
  }
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
  SET_TIMESTAMP(compute_end_ns);

  if (!all_response_failed) {
-    RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
+    FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
        responses, request_count, all_response_failed,
        ReadOutputTensors(total_batch_size, requests, request_count,
                          &responses));
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
  // BackendOutputResponder responder(
  //     requests, request_count, responses,
  //     model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
-  //     model_state_->EnablePinnedInput(), CudaStream());
+  //     model_state_->EnablePinnedOutput(), CudaStream());
  // r21.10
  BackendOutputResponder responder(
      requests, request_count, responses, StateForModel()->MaxBatchSize(),