mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
support build cpu images (#341)
This commit is contained in:
@@ -28,6 +28,7 @@ cmake_minimum_required(VERSION 3.17)
|
|||||||
|
|
||||||
project(trironpaddlebackend LANGUAGES C CXX)
|
project(trironpaddlebackend LANGUAGES C CXX)
|
||||||
|
|
||||||
|
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
|
||||||
set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
|
set(FASTDEPLOY_DIR "" CACHE PATH "Paths to FastDeploy Directory. Multiple paths may be specified by sparating them with a semicolon.")
|
||||||
set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
|
set(FASTDEPLOY_INCLUDE_PATHS "${FASTDEPLOY_DIR}/include"
|
||||||
CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
|
CACHE PATH "Paths to FastDeploy includes. Multiple paths may be specified by sparating them with a semicolon.")
|
||||||
@@ -39,6 +40,10 @@ set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/
|
|||||||
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
|
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
|
||||||
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
|
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
|
||||||
|
|
||||||
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
|
endif()
|
||||||
|
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
@@ -61,6 +66,13 @@ FetchContent_Declare(
|
|||||||
)
|
)
|
||||||
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
|
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
|
||||||
|
|
||||||
|
#
|
||||||
|
# CUDA
|
||||||
|
#
|
||||||
|
if(${TRITON_ENABLE_GPU})
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
endif() # TRITON_ENABLE_GPU
|
||||||
|
|
||||||
configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
|
configure_file(src/libtriton_fastdeploy.ldscript libtriton_fastdeploy.ldscript COPYONLY)
|
||||||
|
|
||||||
add_library(
|
add_library(
|
||||||
@@ -73,11 +85,7 @@ target_include_directories(
|
|||||||
triton-fastdeploy-backend
|
triton-fastdeploy-backend
|
||||||
PRIVATE
|
PRIVATE
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||||
)
|
${FASTDEPLOY_INCLUDE_PATHS}
|
||||||
|
|
||||||
target_include_directories(
|
|
||||||
triton-fastdeploy-backend
|
|
||||||
PRIVATE ${FASTDEPLOY_INCLUDE_PATHS}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(
|
target_link_libraries(
|
||||||
@@ -92,6 +100,13 @@ target_compile_options(
|
|||||||
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
|
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(${TRITON_ENABLE_GPU})
|
||||||
|
target_compile_definitions(
|
||||||
|
triton-fastdeploy-backend
|
||||||
|
PRIVATE TRITON_ENABLE_GPU=1
|
||||||
|
)
|
||||||
|
endif() # TRITON_ENABLE_GPU
|
||||||
|
|
||||||
set_target_properties(
|
set_target_properties(
|
||||||
triton-fastdeploy-backend PROPERTIES
|
triton-fastdeploy-backend PROPERTIES
|
||||||
POSITION_INDEPENDENT_CODE ON
|
POSITION_INDEPENDENT_CODE ON
|
||||||
@@ -107,3 +122,11 @@ target_link_libraries(
|
|||||||
triton-backend-utils # from repo-backend
|
triton-backend-utils # from repo-backend
|
||||||
triton-core-serverstub # from repo-core
|
triton-core-serverstub # from repo-core
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(${TRITON_ENABLE_GPU})
|
||||||
|
target_link_libraries(
|
||||||
|
triton-fastdeploy-backend
|
||||||
|
PRIVATE
|
||||||
|
CUDA::cudart
|
||||||
|
)
|
||||||
|
endif() # TRITON_ENABLE_GPU
|
||||||
|
32
serving/Dockfile_cpu
Normal file
32
serving/Dockfile_cpu
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
FROM paddlepaddle/fastdeploy:22.09-cpu-only-min
|
||||||
|
|
||||||
|
ENV TZ=Asia/Shanghai \
|
||||||
|
DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
|
||||||
|
&& python3 -m pip install -U pip \
|
||||||
|
&& python3 -m pip install paddlepaddle faster_tokenizer
|
||||||
|
|
||||||
|
COPY python/dist/*.whl /opt/fastdeploy/
|
||||||
|
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||||
|
&& rm -rf /opt/fastdeploy/*.whl
|
||||||
|
|
||||||
|
COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
|
||||||
|
COPY build/fastdeploy-0.0.3 /opt/fastdeploy/
|
||||||
|
|
||||||
|
RUN mv /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
|
||||||
|
ENV LD_LIBRARY_PATH="/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
|
@@ -12,7 +12,18 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
WITH_GPU=${1:-ON}
|
||||||
|
|
||||||
sh build_fd_vison.sh
|
if [ $WITH_GPU == "ON" ]; then
|
||||||
sh build_fd_runtime.sh
|
|
||||||
sh build_fd_backend.sh
|
sh build_fd_vison.sh ON
|
||||||
|
sh build_fd_runtime.sh ON
|
||||||
|
sh build_fd_backend.sh ON
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
sh build_fd_vison.sh OFF
|
||||||
|
sh build_fd_runtime.sh OFF
|
||||||
|
sh build_fd_backend.sh OFF
|
||||||
|
|
||||||
|
fi
|
||||||
|
@@ -12,6 +12,11 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
WITH_GPU=${1:-ON}
|
||||||
|
|
||||||
|
if [ $WITH_GPU == "ON" ]; then
|
||||||
|
|
||||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
@@ -27,3 +32,13 @@ docker run -it --rm --name build_fd_backend \
|
|||||||
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
||||||
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||||
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
|
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
|
||||||
|
else
|
||||||
|
docker run -it --rm --name build_fd_backend \
|
||||||
|
-v`pwd`:/workspace/fastdeploy \
|
||||||
|
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||||
|
bash -c \
|
||||||
|
'cd /workspace/fastdeploy/serving;
|
||||||
|
rm -rf build; mkdir build; cd build;
|
||||||
|
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
||||||
|
cmake .. -DTRITON_ENABLE_GPU=OFF -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r22.09 -DTRITON_CORE_REPO_TAG=r22.09 -DTRITON_BACKEND_REPO_TAG=r22.09; make -j`nproc`'
|
||||||
|
fi
|
||||||
|
@@ -12,6 +12,10 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
WITH_GPU=${1:-ON}
|
||||||
|
|
||||||
|
if [ $WITH_GPU == "ON" ]; then
|
||||||
|
|
||||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
@@ -37,3 +41,17 @@ docker run -it --rm --name build_fd_runtime \
|
|||||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||||
make -j`nproc`;
|
make -j`nproc`;
|
||||||
make install'
|
make install'
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
docker run -it --rm --name build_fd_runtime \
|
||||||
|
-v`pwd`:/workspace/fastdeploy \
|
||||||
|
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||||
|
bash -c \
|
||||||
|
'cd /workspace/fastdeploy;
|
||||||
|
rm -rf build; mkdir build; cd build;
|
||||||
|
cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||||
|
make -j`nproc`;
|
||||||
|
make install'
|
||||||
|
|
||||||
|
fi
|
||||||
|
@@ -12,6 +12,10 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
WITH_GPU=${1:-ON}
|
||||||
|
|
||||||
|
if [ $WITH_GPU == "ON" ]; then
|
||||||
|
|
||||||
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||||
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
wget https://github.com/Kitware/CMake/releases/download/v3.18.6/cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
tar -zxvf cmake-3.18.6-Linux-x86_64.tar.gz
|
||||||
@@ -34,3 +38,19 @@ docker run -it --rm --name build_fd_vison \
|
|||||||
export ENABLE_TEXT=ON;
|
export ENABLE_TEXT=ON;
|
||||||
python setup.py build;
|
python setup.py build;
|
||||||
python setup.py bdist_wheel'
|
python setup.py bdist_wheel'
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
docker run -it --rm --name build_fd_vison \
|
||||||
|
-v`pwd`:/workspace/fastdeploy \
|
||||||
|
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||||
|
bash -c \
|
||||||
|
'cd /workspace/fastdeploy/python;
|
||||||
|
rm -rf .setuptools-cmake-build dist;
|
||||||
|
export WITH_GPU=OFF;
|
||||||
|
export ENABLE_VISION=ON;
|
||||||
|
export ENABLE_TEXT=ON;
|
||||||
|
python setup.py build;
|
||||||
|
python setup.py bdist_wheel'
|
||||||
|
|
||||||
|
fi
|
||||||
|
@@ -40,7 +40,8 @@ namespace triton {
|
|||||||
namespace backend {
|
namespace backend {
|
||||||
namespace fastdeploy_runtime {
|
namespace fastdeploy_runtime {
|
||||||
|
|
||||||
#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
|
#define FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR( \
|
||||||
|
RESPONSES, RESPONSES_COUNT, BOOL, X) \
|
||||||
do { \
|
do { \
|
||||||
TRITONSERVER_Error* raasnie_err__ = (X); \
|
TRITONSERVER_Error* raasnie_err__ = (X); \
|
||||||
if (raasnie_err__ != nullptr) { \
|
if (raasnie_err__ != nullptr) { \
|
||||||
|
@@ -918,7 +918,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
|||||||
requests, request_count, &responses, model_state_->TritonMemoryManager(),
|
requests, request_count, &responses, model_state_->TritonMemoryManager(),
|
||||||
model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
|
model_state_->EnablePinnedInput(), CudaStream(), nullptr, nullptr, 0,
|
||||||
HostPolicyName().c_str());
|
HostPolicyName().c_str());
|
||||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||||
responses, request_count, all_response_failed,
|
responses, request_count, all_response_failed,
|
||||||
SetInputTensors(total_batch_size, requests, request_count, &responses,
|
SetInputTensors(total_batch_size, requests, request_count, &responses,
|
||||||
&collector, &cuda_copy));
|
&collector, &cuda_copy));
|
||||||
@@ -934,7 +934,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
|||||||
SET_TIMESTAMP(compute_start_ns);
|
SET_TIMESTAMP(compute_start_ns);
|
||||||
|
|
||||||
if (!all_response_failed) {
|
if (!all_response_failed) {
|
||||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
|
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(responses, request_count,
|
||||||
all_response_failed,
|
all_response_failed,
|
||||||
Run(&responses, request_count));
|
Run(&responses, request_count));
|
||||||
}
|
}
|
||||||
@@ -943,7 +943,7 @@ void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests,
|
|||||||
SET_TIMESTAMP(compute_end_ns);
|
SET_TIMESTAMP(compute_end_ns);
|
||||||
|
|
||||||
if (!all_response_failed) {
|
if (!all_response_failed) {
|
||||||
RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
FD_RESPOND_ALL_AND_SET_TRUE_IF_ERROR(
|
||||||
responses, request_count, all_response_failed,
|
responses, request_count, all_response_failed,
|
||||||
ReadOutputTensors(total_batch_size, requests, request_count,
|
ReadOutputTensors(total_batch_size, requests, request_count,
|
||||||
&responses));
|
&responses));
|
||||||
@@ -1096,7 +1096,7 @@ TRITONSERVER_Error* ModelInstanceState::ReadOutputTensors(
|
|||||||
// BackendOutputResponder responder(
|
// BackendOutputResponder responder(
|
||||||
// requests, request_count, responses,
|
// requests, request_count, responses,
|
||||||
// model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
|
// model_state_->TritonMemoryManager(), model_state_->MaxBatchSize() > 0,
|
||||||
// model_state_->EnablePinnedInput(), CudaStream());
|
// model_state_->EnablePinnedOutput(), CudaStream());
|
||||||
// r21.10
|
// r21.10
|
||||||
BackendOutputResponder responder(
|
BackendOutputResponder responder(
|
||||||
requests, request_count, responses, StateForModel()->MaxBatchSize(),
|
requests, request_count, responses, StateForModel()->MaxBatchSize(),
|
||||||
|
Reference in New Issue
Block a user