Add Paddle Inference as backend (#33)

* Add Paddle Inference as backend * Update CMakeLists.txt * Fix default backend option * Fix some log bug and backend choosing strategy * Add version of g++
2025-12-24 13:28:13 +08:00 · 2022-07-22 09:53:46 +08:00
parent e248781784
commit 51ecb407d4
13 changed files with 569 additions and 78 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,7 @@ option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support loa
 option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
 option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF)
 option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF)
+option(ENABLE_PADDLE_BACKEND "if to enable paddle backend." OFF)
 option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.")
 option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.")
 option(ENABLE_VISION "if to enable vision models usage." OFF)
@@ -39,7 +40,7 @@ option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow proces
 option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF)

 # Whether to build fastdeply with vision/text/... examples, only for testings.
-option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" ON)
+option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF)

 if(ENABLE_DEBUG)
  add_definitions(-DFASTDEPLOY_DEBUG)
@@ -63,10 +64,11 @@ endif()
 add_definitions(-DFASTDEPLOY_LIB)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/*.cc)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/ort/*.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/paddle/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})

 set(DEPEND_LIBS "")

@@ -87,6 +89,13 @@ if(ENABLE_ORT_BACKEND)
  list(APPEND DEPEND_LIBS external_onnxruntime)
 endif()

+if(ENABLE_PADDLE_BACKEND)
+  add_definitions(-DENABLE_PADDLE_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
+  include(external/paddle_inference.cmake)
+  list(APPEND DEPEND_LIBS external_paddle_inference external_dnnl external_omp)
+endif()
+
 if(WITH_GPU)
  if(APPLE)
    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
@@ -280,3 +289,10 @@ if(BUILD_FASTDEPLOY_PYTHON)
  endif()

 endif(BUILD_FASTDEPLOY_PYTHON)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0")
+    string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION)
+    message(WARNING "[WARNING] FastDeploy require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.")
+  endif()
+endif()
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -2,7 +2,9 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)

 set(WITH_GPU @WITH_GPU@)
 set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
-set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND&)
+set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
+set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
+set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
 set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
 set(ENABLE_VISION @ENABLE_VISION@)
 set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
@@ -17,16 +19,35 @@ endif()

 find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib)
 list(APPEND FASTDEPLOY_LIBS ${FDLIB})
+
 if(ENABLE_ORT_BACKEND)
  find_library(ORT_LIB onnxruntime ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib)
  list(APPEND FASTDEPLOY_LIBS ${ORT_LIB})
 endif()

+if(ENABLE_PADDLE_BACKEND)
+  find_library(PADDLE_LIB paddle_inference ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/paddle/lib)
+  if(WIN32)
+    set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/mkldnn.lib")
+    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.lib")
+  elseif(APPLE)
+    set(DNNL_LIB "")
+    set(IOMP_LIB "")
+  else()
+    set(DNNL_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mkldnn/lib/libmkldnn.so.0")
+    set(IOMP_LIB "${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle_inference/third_party/install/mklml/lib/libiomp5.so")
+  endif()
+  list(APPEND FASTDEPLOY_LIBS ${PADDLE_LIB} ${DNNL_LIB} ${IOMP_LIB})
+endif()
+
 if(WITH_GPU)
  if (NOT CUDA_DIRECTORY)
-    message(FATAL_ERROR "[FastDeploy] Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/usr/local/cuda")
+    set(CUDA_DIRECTORY "/usr/local/cuda")
  endif()
  find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
+  if(NOT CUDA_LIB)
+    message(FATAL_ERROR "[FastDeploy] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda")
+  endif()
  list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})

  if (ENABLE_TRT_BACKEND)
@@ -61,3 +82,28 @@ if(ENABLE_PADDLE_FRONTEND)
  find_library(PADDLE2ONNX_LIB paddle2onnx  ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib)
  list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
 endif()
+
+# Print compiler information
+message(STATUS "")
+message(STATUS "*************FastDeploy Building Summary**********")
+message(STATUS "  CMake version             : ${CMAKE_VERSION}")
+message(STATUS "  CMake command             : ${CMAKE_COMMAND}")
+message(STATUS "  System                    : ${CMAKE_SYSTEM_NAME}")
+message(STATUS "  C++ compiler              : ${CMAKE_CXX_COMPILER}")
+message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
+message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
+message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
+message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
+message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
+if(ENABLE_PADDLE_BACKEND)
+  message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
+endif()
+message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
+message(STATUS "  ENABLE_VISION             : ${ENABLE_VISION}")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4.0")
+    string(STRIP "${CMAKE_CXX_COMPILER_VERSION}" CMAKE_CXX_COMPILER_VERSION)
+    message(WARNING "[WARNING] FastDeploy require g++ version >= 5.4.0, but now your g++ version is ${CMAKE_CXX_COMPILER_VERSION}, this may cause failure! Use -DCMAKE_CXX_COMPILER to define path of your compiler.")
+  endif()
+endif()
--- a/external/paddle_inference.cmake
+++ b/external/paddle_inference.cmake
@@ -0,0 +1,107 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include(ExternalProject)
+
+set(PADDLEINFERENCE_PROJECT "extern_paddle_inference")
+set(PADDLEINFERENCE_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle_inference)
+set(PADDLEINFERENCE_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/paddle_inference/src/${PADDLEINFERENCE_PROJECT})
+set(PADDLEINFERENCE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle_inference)
+set(PADDLEINFERENCE_INC_DIR
+    "${PADDLEINFERENCE_INSTALL_DIR}/paddle/include"
+    CACHE PATH "paddle_inference include directory." FORCE)
+set(PADDLEINFERENCE_LIB_DIR
+    "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/"
+    CACHE PATH "paddle_inference lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
+                      "${PADDLEINFERENCE_LIB_DIR}")
+
+include_directories(${PADDLEINFERENCE_INC_DIR})
+if(WIN32)
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  set(DNNL_LIB "")
+  set(OMP_LIB "")
+elseif(APPLE)
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.dylib"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  set(DNNL_LIB "")
+  set(OMP_LIB "")
+else()
+  set(PADDLEINFERENCE_COMPILE_LIB
+      "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so"
+      CACHE FILEPATH "paddle_inference compile library." FORCE)
+  set(DNNL_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mkldnn/lib/libdnnl.so.2")
+  set(OMP_LIB "${PADDLEINFERENCE_INSTALL_DIR}/third_party/install/mklml/lib/libiomp5.so")
+endif(WIN32)
+
+set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
+set(PADDLEINFERENCE_VERSION "2.3.1")
+if(WIN32)
+  message(FATAL_ERROR "Paddle Backend doesn't support Windows now.")
+  set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-${PADDLEINFERENCE_VERSION}.zip")
+elseif(APPLE)
+  message(FATAL_ERROR "Paddle Backend doesn't support Mac OSX now.")
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
+    set(PADDLEINFERENCE_FILE "paddle_inference-osx-arm64-${PADDLEINFERENCE_VERSION}.tgz")
+  else()
+    set(PADDLEINFERENCE_FILE "paddle_inference-osx-x86_64-${PADDLEINFERENCE_VERSION}.tgz")
+  endif()
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    message(FATAL_ERROR "Paddle Backend doesn't support linux aarch64 now.")
+    set(PADDLEINFERENCE_FILE "paddle_inference-linux-aarch64-${PADDLEINFERENCE_VERSION}.tgz")
+  else()
+    set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-${PADDLEINFERENCE_VERSION}.tgz")
+    if(WITH_GPU)
+        set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-${PADDLEINFERENCE_VERSION}.tgz")
+    endif()
+  endif()
+endif()
+set(PADDLEINFERENCE_URL "${PADDLEINFERENCE_URL_BASE}${PADDLEINFERENCE_FILE}")
+
+ExternalProject_Add(
+  ${PADDLEINFERENCE_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${PADDLEINFERENCE_URL}
+  PREFIX ${PADDLEINFERENCE_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E remove_directory ${PADDLEINFERENCE_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E make_directory ${PADDLEINFERENCE_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E rename ${PADDLEINFERENCE_SOURCE_DIR}/paddle/
+    ${PADDLEINFERENCE_INSTALL_DIR}/paddle && ${CMAKE_COMMAND} -E rename 
+    ${PADDLEINFERENCE_SOURCE_DIR}/third_party ${PADDLEINFERENCE_INSTALL_DIR}/third_party &&
+    ${CMAKE_COMMAND} -E rename ${PADDLEINFERENCE_SOURCE_DIR}/version.txt ${PADDLEINFERENCE_INSTALL_DIR}/version.txt
+  BUILD_BYPRODUCTS ${PADDLEINFERENCE_COMPILE_LIB})
+
+add_library(external_paddle_inference STATIC IMPORTED GLOBAL)
+set_property(TARGET external_paddle_inference PROPERTY IMPORTED_LOCATION
+                                         ${PADDLEINFERENCE_COMPILE_LIB})
+add_dependencies(external_paddle_inference ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_dnnl STATIC IMPORTED GLOBAL)
+set_property(TARGET external_dnnl PROPERTY IMPORTED_LOCATION
+                                         ${DNNL_LIB})
+add_dependencies(external_dnnl ${PADDLEINFERENCE_PROJECT})
+
+add_library(external_omp STATIC IMPORTED GLOBAL)
+set_property(TARGET external_omp PROPERTY IMPORTED_LOCATION
+                                         ${OMP_LIB})
+add_dependencies(external_omp ${PADDLEINFERENCE_PROJECT})
--- a/external/summary.cmake
+++ b/external/summary.cmake
@@ -30,11 +30,17 @@ function(fastdeploy_summary)
  message(STATUS "")
  message(STATUS "  FastDeploy version        : ${FASTDEPLOY_VERSION}")
  message(STATUS "  Paddle2ONNX version       : ${PADDLE2ONNX_VERSION}")
-  message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
  message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
+  message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
+  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
+  if(ENABLE_ORT_BACKEND)
+    message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
+  endif()
+  if(ENABLE_PADDLE_BACKEND)
+    message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
+  endif()
  if(WITH_GPU)
    message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
-    message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
    message(STATUS "  CUDA_DIRECTORY            : ${CUDA_DIRECTORY}")
    message(STATUS "  TRT_DRECTORY              : ${TRT_DIRECTORY}")
  endif()
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/backends/paddle/paddle_backend.h"
+
+namespace fastdeploy {
+
+void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
+  if (option.use_gpu) {
+    config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
+  } else {
+    config_.DisableGpu();
+    if (option.enable_mkldnn) {
+      config_.EnableMKLDNN();
+      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
+    }
+  }
+  config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
+}
+
+bool PaddleBackend::InitFromPaddle(const std::string& model_file,
+                                   const std::string& params_file,
+                                   const PaddleBackendOption& option) {
+  if (initialized_) {
+    FDERROR << "PaddleBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  config_.SetModel(model_file, params_file);
+  BuildOption(option);
+  predictor_ = paddle_infer::CreatePredictor(config_);
+  std::vector<std::string> input_names = predictor_->GetInputNames();
+  std::vector<std::string> output_names = predictor_->GetOutputNames();
+  for (size_t i = 0; i < input_names.size(); ++i) {
+    auto handle = predictor_->GetInputHandle(input_names[i]);
+    TensorInfo info;
+    auto shape = handle->shape();
+    info.shape.assign(shape.begin(), shape.end());
+    info.dtype = PaddleDataTypeToFD(handle->type());
+    info.name = input_names[i];
+    inputs_desc_.emplace_back(info);
+  }
+  for (size_t i = 0; i < output_names.size(); ++i) {
+    auto handle = predictor_->GetOutputHandle(output_names[i]);
+    TensorInfo info;
+    auto shape = handle->shape();
+    info.shape.assign(shape.begin(), shape.end());
+    info.dtype = PaddleDataTypeToFD(handle->type());
+    info.name = output_names[i];
+    outputs_desc_.emplace_back(info);
+  }
+  initialized_ = true;
+  return true;
+}
+
+TensorInfo PaddleBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
+                                    " should less than the number of inputs:" +
+                                    std::to_string(NumInputs()) + ".");
+  return inputs_desc_[index];
+}
+
+TensorInfo PaddleBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index:" + std::to_string(index) +
+               " should less than the number of outputs:" +
+               std::to_string(NumOutputs()) + ".");
+  return outputs_desc_[index];
+}
+
+bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
+                          std::vector<FDTensor>* outputs) {
+  if (inputs.size() != inputs_desc_.size()) {
+    FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << inputs_desc_.size() << ")." << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto handle = predictor_->GetInputHandle(inputs[i].name);
+    ShareTensorFromCpu(handle.get(), inputs[i]);
+  }
+
+  predictor_->Run();
+  outputs->resize(outputs_desc_.size());
+  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
+    CopyTensorToCpu(handle, &((*outputs)[i]));
+  }
+  return true;
+}
+
+}  // namespace fastdeploy
--- a/fastdeploy/backends/paddle/paddle_backend.h
+++ b/fastdeploy/backends/paddle/paddle_backend.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "fastdeploy/backends/backend.h"
+#include "paddle_inference_api.h"  // NOLINT
+
+namespace fastdeploy {
+
+struct PaddleBackendOption {
+#ifdef WITH_GPU
+  bool use_gpu = true;
+#else
+  bool use_gpu = false;
+#endif
+  bool enable_mkldnn = true;
+
+  int mkldnn_cache_size = 1;
+  int cpu_thread_num = 8;
+  // initialize memory size(MB) for GPU
+  int gpu_mem_init_size = 100;
+  // gpu device id
+  int gpu_id = 0;
+};
+
+// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
+void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
+
+// Copy memory data from paddle_infer::Tensor to fastdeploy::FDTensor
+void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
+                     FDTensor* fd_tensor);
+
+// Convert data type from paddle inference to fastdeploy
+FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
+
+class PaddleBackend : public BaseBackend {
+ public:
+  PaddleBackend() {}
+  void BuildOption(const PaddleBackendOption& option);
+
+  bool InitFromPaddle(
+      const std::string& model_file, const std::string& params_file,
+      const PaddleBackendOption& option = PaddleBackendOption());
+
+  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
+
+  int NumInputs() const { return inputs_desc_.size(); }
+
+  int NumOutputs() const { return outputs_desc_.size(); }
+
+  TensorInfo GetInputInfo(int index);
+  TensorInfo GetOutputInfo(int index);
+
+ private:
+  paddle_infer::Config config_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;
+  std::vector<TensorInfo> inputs_desc_;
+  std::vector<TensorInfo> outputs_desc_;
+};
+}  // namespace fastdeploy
--- a/fastdeploy/backends/paddle/util.cc
+++ b/fastdeploy/backends/paddle/util.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/backends/paddle/paddle_backend.h"
+
+namespace fastdeploy {
+void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) {
+  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
+  if (fd_tensor.dtype == FDDataType::FP32) {
+    tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
+                              shape, paddle_infer::PlaceType::kCPU);
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT32) {
+    tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
+                              shape, paddle_infer::PlaceType::kCPU);
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT64) {
+    tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
+                              shape, paddle_infer::PlaceType::kCPU);
+    return;
+  }
+  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor.dtype) +
+                      ") while infer with PaddleBackend.");
+}
+
+void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
+                     FDTensor* fd_tensor) {
+  auto fd_dtype = PaddleDataTypeToFD(tensor->type());
+  fd_tensor->Allocate(tensor->shape(), fd_dtype, tensor->name());
+  if (fd_tensor->dtype == FDDataType::FP32) {
+    tensor->CopyToCpu(static_cast<float*>(fd_tensor->MutableData()));
+    return;
+  } else if (fd_tensor->dtype == FDDataType::INT32) {
+    tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor->MutableData()));
+    return;
+  } else if (fd_tensor->dtype == FDDataType::INT64) {
+    tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor->MutableData()));
+    return;
+  }
+  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor->dtype) +
+                      ") while infer with PaddleBackend.");
+}
+
+FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype) {
+  auto fd_dtype = FDDataType::FP32;
+  if (dtype == paddle_infer::FLOAT32) {
+    fd_dtype = FDDataType::FP32;
+  } else if (dtype == paddle_infer::INT64) {
+    fd_dtype = FDDataType::INT64;
+  } else if (dtype == paddle_infer::INT32) {
+    fd_dtype = FDDataType::INT32;
+  } else if (dtype == paddle_infer::UINT8) {
+    fd_dtype = FDDataType::UINT8;
+  } else {
+    FDASSERT(false, "Unexpected data type:" + std::to_string(int(dtype)) +
+                        " while call CopyTensorToCpu in PaddleBackend.");
+  }
+  return fd_dtype;
+}
+
+}  // namespace fastdeploy
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -29,6 +29,10 @@
 #cmakedefine ENABLE_ORT_BACKEND
 #endif

+#ifndef ENABLE_PADDLE_BACKEND
+#cmakedefine ENABLE_PADDLE_BACKEND
+#endif
+
 #ifndef WITH_GPU
 #cmakedefine WITH_GPU
 #endif
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -36,12 +36,21 @@ bool FastDeployModel::InitRuntime() {
    } else if (runtime_option.backend == Backend::TRT) {
      if (!IsBackendAvailable(Backend::TRT)) {
        FDERROR
-            << "Backend:TRT is not complied with current FastDeploy library."
+            << "Backend::TRT is not complied with current FastDeploy library."
            << std::endl;
        return false;
      }
+    } else if (runtime_option.backend == Backend::PDINFER) {
+      if (!IsBackendAvailable(Backend::PDINFER)) {
+        FDERROR << "Backend::PDINFER is not compiled with current FastDeploy "
+                   "library."
+                << std::endl;
+        return false;
+      }
    } else {
-      FDERROR << "Only support Backend::ORT / Backend::TRT now." << std::endl;
+      FDERROR
+          << "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now."
+          << std::endl;
      return false;
    }
    runtime_ = new Runtime();
@@ -74,29 +83,19 @@ bool FastDeployModel::CreateCpuBackend() {
    return false;
  }

-  for (auto& b : valid_cpu_backends) {
-    if (b == Backend::ORT) {
-      if (!IsBackendAvailable(Backend::ORT)) {
-        FDERROR << "OrtBackend is not complied with current FastDeploy library."
-                << std::endl;
-        continue;
-      }
-      runtime_option.backend = b;
-      runtime_ = new Runtime();
-      if (!runtime_->Init(runtime_option)) {
-        return false;
-      }
-      runtime_initialized_ = true;
-      return true;
-    } else {
-      FDERROR << "Only Backend::ORT as cpu backend is supported now."
-              << std::endl;
+  for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_cpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_cpu_backends[i];
+    runtime_ = new Runtime();
+    if (!runtime_->Init(runtime_option)) {
      return false;
    }
+    runtime_initialized_ = true;
+    return true;
  }
-
-  FDERROR << "Cannot find an available cpu backend to load this model."
-          << std::endl;
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  return false;
 }

@@ -107,40 +106,18 @@ bool FastDeployModel::CreateGpuBackend() {
    return false;
  }

-  for (auto& b : valid_gpu_backends) {
-    if (b == Backend::ORT) {
-      if (!IsBackendAvailable(Backend::ORT)) {
-        FDERROR << "OrtBackend is not complied with current FastDeploy library."
-                << std::endl;
-        continue;
-      }
-      runtime_option.backend = b;
-      runtime_ = new Runtime();
-      if (!runtime_->Init(runtime_option)) {
-        return false;
-      }
-      runtime_initialized_ = true;
-      return true;
-    } else if (b == Backend::TRT) {
-      if (!IsBackendAvailable(Backend::TRT)) {
-        FDERROR << "TrtBackend is not complied with current FastDeploy library."
-                << std::endl;
-        continue;
-      }
-      runtime_option.backend = b;
-      runtime_ = new Runtime();
-      if (!runtime_->Init(runtime_option)) {
-        return false;
-      }
-      return true;
-    } else {
-      FDERROR << "Only Backend::ORT / Backend::TRT as gpu backends are "
-                 "supported now."
-              << std::endl;
+  for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_gpu_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_gpu_backends[i];
+    runtime_ = new Runtime();
+    if (!runtime_->Init(runtime_option)) {
      return false;
    }
+    runtime_initialized_ = true;
+    return true;
  }
-
  FDERROR << "Cannot find an available gpu backend to load this model."
          << std::endl;
  return false;
@@ -164,4 +141,4 @@ void FastDeployModel::EnableDebug() {

 bool FastDeployModel::DebugEnabled() { return debug_; }

-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/fastdeploy_runtime.cc
+++ b/fastdeploy/fastdeploy_runtime.cc
@@ -22,6 +22,10 @@
 #include "fastdeploy/backends/tensorrt/trt_backend.h"
 #endif

+#ifdef ENABLE_PADDLE_BACKEND
+#include "fastdeploy/backends/paddle/paddle_backend.h"
+#endif
+
 namespace fastdeploy {

 std::vector<Backend> GetAvailableBackends() {
@@ -31,6 +35,9 @@ std::vector<Backend> GetAvailableBackends() {
 #endif
 #ifdef ENABLE_TRT_BACKEND
  backends.push_back(Backend::TRT);
+#endif
+#ifdef ENABLE_PADDLE_BACKEND
+  backends.push_back(Backend::PDINFER);
 #endif
  return backends;
 }
@@ -45,6 +52,26 @@ bool IsBackendAvailable(const Backend& backend) {
  return false;
 }

+std::string Str(const Backend& b) {
+  if (b == Backend::ORT) {
+    return "Backend::ORT";
+  } else if (b == Backend::TRT) {
+    return "Backend::TRT";
+  } else if (b == Backend::PDINFER) {
+    return "Backend::PDINFER";
+  }
+  return "UNKNOWN-Backend";
+}
+
+std::string Str(const Frontend& f) {
+  if (f == Frontend::PADDLE) {
+    return "Frontend::PADDLE";
+  } else if (f == Frontend::ONNX) {
+    return "Frontend::ONNX";
+  }
+  return "UNKNOWN-Frontend";
+}
+
 bool ModelFormatCheck(const std::string& model_file,
                      const Frontend& model_format) {
  if (model_format == Frontend::PADDLE) {
@@ -74,14 +101,33 @@ bool ModelFormatCheck(const std::string& model_file,

 bool Runtime::Init(const RuntimeOption& _option) {
  option = _option;
+  if (option.backend == Backend::UNKNOWN) {
+    if (IsBackendAvailable(Backend::ORT)) {
+      option.backend = Backend::ORT;
+    } else if (IsBackendAvailable(Backend::PDINFER)) {
+      option.backend = Backend::PDINFER;
+    } else {
+      FDERROR << "Please define backend in RuntimeOption, current it's "
+                 "Backend::UNKNOWN."
+              << std::endl;
+      return false;
+    }
+  }
  if (option.backend == Backend::ORT) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU, "Backend::TRT only supports Device::CPU/Device::GPU.");
+    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
+             "Backend::TRT only supports Device::CPU/Device::GPU.");
    CreateOrtBackend();
  } else if (option.backend == Backend::TRT) {
-    FDASSERT(option.device == Device::GPU, "Backend::TRT only supports Device::GPU.");
+    FDASSERT(option.device == Device::GPU,
+             "Backend::TRT only supports Device::GPU.");
    CreateTrtBackend();
+  } else if (option.backend == Backend::PDINFER) {
+    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
+             "Backend::TRT only supports Device::CPU/Device::GPU.");
+    CreatePaddleBackend();
  } else {
-    FDERROR << "Runtime only support Backend::ORT/Backend::TRT as backend now."
+    FDERROR << "Runtime only support "
+               "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
            << std::endl;
    return false;
  }
@@ -101,6 +147,27 @@ bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
  return backend_->Infer(input_tensors, output_tensors);
 }

+void Runtime::CreatePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  auto pd_option = PaddleBackendOption();
+  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
+  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
+  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
+  pd_option.gpu_id = option.device_id;
+  FDASSERT(option.model_format == Frontend::PADDLE,
+           "PaddleBackend only support model format of Frontend::PADDLE.");
+  backend_ = new PaddleBackend();
+  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_);
+  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
+                                          pd_option),
+           "Load model from Paddle failed while initliazing PaddleBackend.");
+#else
+  FDASSERT(false,
+           "OrtBackend is not available, please compiled with "
+           "ENABLE_ORT_BACKEND=ON.");
+#endif
+}
+
 void Runtime::CreateOrtBackend() {
 #ifdef ENABLE_ORT_BACKEND
  auto ort_option = OrtBackendOption();
@@ -125,8 +192,9 @@ void Runtime::CreateOrtBackend() {
             "Load model from Paddle failed while initliazing OrtBackend.");
  }
 #else
-  FDASSERT(false, "OrtBackend is not available, please compiled with "
-                  "ENABLE_ORT_BACKEND=ON.");
+  FDASSERT(false,
+           "OrtBackend is not available, please compiled with "
+           "ENABLE_ORT_BACKEND=ON.");
 #endif
 }

@@ -158,8 +226,9 @@ void Runtime::CreateTrtBackend() {
             "Load model from Paddle failed while initliazing TrtBackend.");
  }
 #else
-  FDASSERT(false, "TrtBackend is not available, please compiled with "
-                  "ENABLE_TRT_BACKEND=ON.");
+  FDASSERT(false,
+           "TrtBackend is not available, please compiled with "
+           "ENABLE_TRT_BACKEND=ON.");
 #endif
 }
-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -23,6 +23,8 @@ namespace fastdeploy {
 enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
 enum FASTDEPLOY_DECL Frontend { PADDLE, ONNX };

+FASTDEPLOY_DECL std::string Str(const Backend& b);
+FASTDEPLOY_DECL std::string Str(const Frontend& f);
 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();

 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
@@ -31,7 +33,7 @@ bool ModelFormatCheck(const std::string& model_file,
                      const Frontend& model_format);

 struct FASTDEPLOY_DECL RuntimeOption {
-  Backend backend = Backend::ORT;
+  Backend backend = Backend::UNKNOWN;

  // for cpu inference and preprocess
  int cpu_thread_num = 8;
@@ -52,6 +54,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
  int ort_execution_mode = -1;

+  // ======Only for Paddle Backend=====
+  bool pd_enable_mkldnn = true;
+  int pd_mkldnn_cache_size = 1;
+
  // ======Only for Trt Backend=======
  std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
  std::map<std::string, std::vector<int32_t>> trt_max_shape;
@@ -79,6 +85,8 @@ struct FASTDEPLOY_DECL Runtime {

  void CreateOrtBackend();

+  void CreatePaddleBackend();
+
  void CreateTrtBackend();

  int NumInputs() { return backend_->NumInputs(); }
--- a/fastdeploy/vision/ppdet/ppyoloe.cc
+++ b/fastdeploy/vision/ppdet/ppyoloe.cc
@@ -11,8 +11,8 @@ PPYOLOE::PPYOLOE(const std::string& model_file, const std::string& params_file,
                 const RuntimeOption& custom_option,
                 const Frontend& model_format) {
  config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
-  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
+  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
@@ -22,12 +22,12 @@ PPYOLOE::PPYOLOE(const std::string& model_file, const std::string& params_file,

 bool PPYOLOE::Initialize() {
  if (!BuildPreprocessPipelineFromConfig()) {
-    std::cout << "Failed to build preprocess pipeline from configuration file."
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
              << std::endl;
    return false;
  }
  if (!InitRuntime()) {
-    std::cout << "Failed to initialize fastdeploy backend." << std::endl;
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
    return false;
  }
  return true;
@@ -39,13 +39,13 @@ bool PPYOLOE::BuildPreprocessPipelineFromConfig() {
  try {
    cfg = YAML::LoadFile(config_file_);
  } catch (YAML::BadFile& e) {
-    std::cout << "Failed to load yaml file " << config_file_
+    FDERROR << "Failed to load yaml file " << config_file_
              << ", maybe you should check this file." << std::endl;
    return false;
  }

  if (cfg["arch"].as<std::string>() != "YOLO") {
-    std::cout << "Require the arch of model is YOLO, but arch defined in "
+    FDERROR << "Require the arch of model is YOLO, but arch defined in "
                 "config file is "
              << cfg["arch"].as<std::string>() << "." << std::endl;
    return false;
@@ -76,7 +76,7 @@ bool PPYOLOE::BuildPreprocessPipelineFromConfig() {
    } else if (op_name == "Permute") {
      processors_.push_back(std::make_shared<HWC2CHW>());
    } else {
-      std::cout << "Unexcepted preprocess operator: " << op_name << "."
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
                << std::endl;
      return false;
    }
@@ -89,7 +89,7 @@ bool PPYOLOE::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
  int origin_h = mat->Height();
  for (size_t i = 0; i < processors_.size(); ++i) {
    if (!(*(processors_[i].get()))(mat)) {
-      std::cout << "Failed to process image data in " << processors_[i]->Name()
+      FDERROR << "Failed to process image data in " << processors_[i]->Name()
                << "." << std::endl;
      return false;
    }
--- a/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
+++ b/model_zoo/vision/ppyoloe/cpp/ppyoloe.cc
@@ -23,7 +23,11 @@ int main() {
  std::string img_path = "000000014439_640x640.jpg";
  std::string vis_path = "vis.jpeg";

-  auto model = vis::ppdet::PPYOLOE(model_file, params_file, config_file);
+  auto option = fastdeploy::RuntimeOption();
+  option.device = fastdeploy::Device::CPU;
+  option.backend = fastdeploy::Backend::PDINFER;
+  auto model =
+      vis::ppdet::PPYOLOE(model_file, params_file, config_file, option);
  if (!model.Initialized()) {
    std::cerr << "Init Failed." << std::endl;
    return -1;