support build cpu images (#341)

This commit is contained in:
heliqi
2022-10-11 01:17:27 -05:00
committed by GitHub
parent ce0e3fc43d
commit 5328fbc861
8 changed files with 133 additions and 13 deletions

View File

@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)
project(trironpaddlebackend LANGUAGES C CXX)
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
include(FetchContent)
FetchContent_Declare(
@@ -61,6 +66,13 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
#
# CUDA
#
if(${TRITON_ENABLE_GPU})
find_package(CUDAToolkit REQUIRED)
endif() # TRITON_ENABLE_GPU
configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
add_library(
@@ -73,11 +85,7 @@ target_include_directories(
triton-fastdeploy-backend
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
target_include_directories(
triton-fastdeploy-backend
PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
${FASTDEPLOY_INCLUDE_PATHS}
)
target_link_libraries(
@@ -92,6 +100,13 @@ target_compile_options(
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
)
if(${TRITON_ENABLE_GPU})
target_compile_definitions(
triton-fastdeploy-backend
PRIVATE TRITON_ENABLE_GPU=1
)
endif() # TRITON_ENABLE_GPU
set_target_properties(
triton-fastdeploy-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
@@ -107,3 +122,11 @@ target_link_libraries(
triton-backend-utils # from repo-backend
triton-core-serverstub # from repo-core
)
if(${TRITON_ENABLE_GPU})
target_link_libraries(
triton-fastdeploy-backend
PRIVATE
CUDA::cudart
)
endif() # TRITON_ENABLE_GPU

32
serving/Dockfile_cpu Normal file
View File

@@ -0,0 +1,32 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
&& python3 -m pip install -U pip \
&& python3 -m pip install paddlepaddle faster_tokenizer
COPY python/dist/*.whl /opt/fastdeploy/
RUN python3 -m pip install /opt/fastdeploy/*.whl \
&& rm -rf /opt/fastdeploy/*.whl
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"

View File

@@ -12,7 +12,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}
sh build_fd_vison.sh
sh build_fd_runtime.sh
sh build_fd_backend.sh
if [ $WITH_GPU == "ON" ]; then
sh build_fd_vison.sh ON
sh build_fd_runtime.sh ON
sh build_fd_backend.sh ON
else
sh build_fd_vison.sh OFF
sh build_fd_runtime.sh OFF
sh build_fd_backend.sh OFF
fi

View File

@@ -12,6 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}
if [ $WITH_GPU == "ON" ]; then
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
else
docker run -it --rm --name build_fd_backend \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy/serving;
rm -rf build; mkdir build; cd build;
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
fi

View File

@@ -12,6 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}
if [ $WITH_GPU == "ON" ]; then
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install'
else
docker run -it --rm --name build_fd_runtime \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy;
rm -rf build; mkdir build; cd build;
cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install'
fi

View File

@@ -12,6 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WITH_GPU=${1:-ON}
if [ $WITH_GPU == "ON" ]; then
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
export ENABLE_TEXT=ON;
python setup.py build;
python setup.py bdist_wheel'
else
docker run -it --rm --name build_fd_vison \
-v`pwd`:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy/python;
rm -rf .setuptools-cmake-build dist;
export WITH_GPU=OFF;
export ENABLE_VISION=ON;
export ENABLE_TEXT=ON;
python setup.py build;
python setup.py bdist_wheel'
fi

View File

@@ -40,7 +40,8 @@ namespace triton {
namespace backend {
namespace fastdeploy_runtime {
#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \
RESPONSES, RESPONSES_COUNT, BOOL, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \

View File

@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
requests, request_count, &responses, model_state_->TritonMemoryManager(),
model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
HostPolicyName().c_str());
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
responses, request_count, all_response_failed,
SetInputTensors(total_batch_size, requests, request_count, &responses,
&collector, &cuda_copy));
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
SET_TIMESTAMP(compute_start_ns);
if (!all_response_failed) {
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
all_response_failed,
Run(&responses, request_count));
}
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
SET_TIMESTAMP(compute_end_ns);
if (!all_response_failed) {
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
responses, request_count, all_response_failed,
ReadOutputTensors(total_batch_size, requests, request_count,
&responses));
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
// BackendOutputResponder responder(
// requests, request_count, responses,
// model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
// model_state_->EnablePinnedInput(), CudaStream());
// model_state_->EnablePinnedOutput(), CudaStream());
// r21.10
BackendOutputResponder responder(
requests, request_count, responses, StateForModel()->MaxBatchSize(),