mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
support build cpu images (#341)
This commit is contained in:
@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
project(trironpaddlebackend LANGUAGES C CXX)
|
||||
|
||||
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
|
||||
set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
|
||||
set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
|
||||
CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
|
||||
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
|
||||
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
|
||||
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
@@ -61,6 +66,13 @@ FetchContent_Declare(
|
||||
)
|
||||
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
|
||||
|
||||
#
|
||||
# CUDA
|
||||
#
|
||||
if(${TRITON_ENABLE_GPU})
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif() # TRITON_ENABLE_GPU
|
||||
|
||||
configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
|
||||
|
||||
add_library(
|
||||
@@ -73,11 +85,7 @@ target_include_directories(
|
||||
triton-fastdeploy-backend
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
triton-fastdeploy-backend
|
||||
PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
|
||||
${FASTDEPLOY_INCLUDE_PATHS}
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
@@ -92,6 +100,13 @@ target_compile_options(
|
||||
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
|
||||
)
|
||||
|
||||
if(${TRITON_ENABLE_GPU})
|
||||
target_compile_definitions(
|
||||
triton-fastdeploy-backend
|
||||
PRIVATE TRITON_ENABLE_GPU=1
|
||||
)
|
||||
endif() # TRITON_ENABLE_GPU
|
||||
|
||||
set_target_properties(
|
||||
triton-fastdeploy-backend PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
@@ -107,3 +122,11 @@ target_link_libraries(
|
||||
triton-backend-utils # from repo-backend
|
||||
triton-core-serverstub # from repo-core
|
||||
)
|
||||
|
||||
if(${TRITON_ENABLE_GPU})
|
||||
target_link_libraries(
|
||||
triton-fastdeploy-backend
|
||||
PRIVATE
|
||||
CUDA::cudart
|
||||
)
|
||||
endif() # TRITON_ENABLE_GPU
|
||||
|
32
serving/Dockfile_cpu
Normal file
32
serving/Dockfile_cpu
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
|
||||
|
||||
ENV TZ=Asia/Shanghai \
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
|
||||
&& python3 -m pip install -U pip \
|
||||
&& python3 -m pip install paddlepaddle faster_tokenizer
|
||||
|
||||
COPY python/dist/*.whl /opt/fastdeploy/
|
||||
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||
&& rm -rf /opt/fastdeploy/*.whl
|
||||
|
||||
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
|
||||
COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
|
||||
|
||||
RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
|
||||
ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
|
@@ -12,7 +12,18 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
WITH_GPU=${1:-ON}
|
||||
|
||||
sh build_fd_vison.sh
|
||||
sh build_fd_runtime.sh
|
||||
sh build_fd_backend.sh
|
||||
if [ $WITH_GPU == "ON" ]; then
|
||||
|
||||
sh build_fd_vison.sh ON
|
||||
sh build_fd_runtime.sh ON
|
||||
sh build_fd_backend.sh ON
|
||||
|
||||
else
|
||||
|
||||
sh build_fd_vison.sh OFF
|
||||
sh build_fd_runtime.sh OFF
|
||||
sh build_fd_backend.sh OFF
|
||||
|
||||
fi
|
||||
|
@@ -12,6 +12,11 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
WITH_GPU=${1:-ON}
|
||||
|
||||
if [ $WITH_GPU == "ON" ]; then
|
||||
|
||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
|
||||
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
||||
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
|
||||
else
|
||||
docker run -it --rm --name build_fd_backend \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy/serving;
|
||||
rm -rf build; mkdir build; cd build;
|
||||
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
||||
cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
|
||||
fi
|
||||
|
@@ -12,6 +12,10 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
WITH_GPU=${1:-ON}
|
||||
|
||||
if [ $WITH_GPU == "ON" ]; then
|
||||
|
||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
|
||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
make -j`nproc`;
|
||||
make install'
|
||||
|
||||
else
|
||||
|
||||
docker run -it --rm --name build_fd_runtime \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy;
|
||||
rm -rf build; mkdir build; cd build;
|
||||
cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
make -j`nproc`;
|
||||
make install'
|
||||
|
||||
fi
|
||||
|
@@ -12,6 +12,10 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
WITH_GPU=${1:-ON}
|
||||
|
||||
if [ $WITH_GPU == "ON" ]; then
|
||||
|
||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
|
||||
export ENABLE_TEXT=ON;
|
||||
python setup.py build;
|
||||
python setup.py bdist_wheel'
|
||||
|
||||
else
|
||||
|
||||
docker run -it --rm --name build_fd_vison \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy/python;
|
||||
rm -rf .setuptools-cmake-build dist;
|
||||
export WITH_GPU=OFF;
|
||||
export ENABLE_VISION=ON;
|
||||
export ENABLE_TEXT=ON;
|
||||
python setup.py build;
|
||||
python setup.py bdist_wheel'
|
||||
|
||||
fi
|
||||
|
@@ -40,7 +40,8 @@ namespace triton {
|
||||
namespace backend {
|
||||
namespace fastdeploy_runtime {
|
||||
|
||||
#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
|
||||
#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \
|
||||
RESPONSES, RESPONSES_COUNT, BOOL, X) \
|
||||
do { \
|
||||
TRITONSERVER_Error* raasnie_err__ = (X); \
|
||||
if (raasnie_err__ != nullptr) { \
|
||||
|
@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
||||
requests, request_count, &responses, model_state_->TritonMemoryManager(),
|
||||
model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
|
||||
HostPolicyName().c_str());
|
||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||
responses, request_count, all_response_failed,
|
||||
SetInputTensors(total_batch_size, requests, request_count, &responses,
|
||||
&collector, &cuda_copy));
|
||||
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
||||
SET_TIMESTAMP(compute_start_ns);
|
||||
|
||||
if (!all_response_failed) {
|
||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
|
||||
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
|
||||
all_response_failed,
|
||||
Run(&responses, request_count));
|
||||
}
|
||||
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
||||
SET_TIMESTAMP(compute_end_ns);
|
||||
|
||||
if (!all_response_failed) {
|
||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||
responses, request_count, all_response_failed,
|
||||
ReadOutputTensors(total_batch_size, requests, request_count,
|
||||
&responses));
|
||||
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
|
||||
// BackendOutputResponder responder(
|
||||
// requests, request_count, responses,
|
||||
// model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
|
||||
// model_state_->EnablePinnedInput(), CudaStream());
|
||||
// model_state_->EnablePinnedOutput(), CudaStream());
|
||||
// r21.10
|
||||
BackendOutputResponder responder(
|
||||
requests, request_count, responses, StateForModel()->MaxBatchSize(),
|
||||
|
Reference in New Issue
Block a user