From f3d44785c45d3b4c53e281df7085767c5acbf3a5 Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Thu, 20 Apr 2023 21:12:43 +0800
Subject: [PATCH] Remove Paddle Reader (#1813)

* Remove Paddle Reader

* support pp-infer c++14

* disable trt cache

---------

Co-authored-by: wang-xinyu <wangxinyu_es@163.com>
---
 CMakeLists.txt                                |  2 +-
 cmake/paddle_inference.cmake                  |  1 +
 fastdeploy/runtime/backends/paddle/option.h   |  2 +
 .../runtime/backends/paddle/option_pybind.cc  |  2 +
 .../runtime/backends/paddle/paddle_backend.cc | 71 +++++++++++++------
 scripts/patch_paddle_inference.py             |  1 +
 serving/src/fastdeploy_runtime.cc             |  4 +-
 7 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 332f382da..9ed16259e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,7 +36,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake)
 
 # Set C++11 as standard for the whole project
 if(NOT MSVC)
-  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD 14)
   set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
   if(NEED_ABI0)
     add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake
index 1aad4dfda..1e9e90081 100755
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -43,6 +43,7 @@ if(PADDLEINFERENCE_DIRECTORY)
 endif()
 
 include_directories(${PADDLEINFERENCE_INC_DIR})
+include_directories(${PADDLEINFERENCE_DIRECTORY})
 if(WIN32)
   set(PADDLEINFERENCE_COMPILE_LIB
       "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib"
diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h
index 134fc7ef0..80380e0e2 100755
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -58,6 +58,8 @@ struct PaddleBackendOption {
   bool enable_memory_optimize = true;
   /// Whether enable ir debug, default false
   bool switch_ir_debug = false;
+  /// Whether the load model is quantized model
+  bool is_quantize_model = false;
 
   /*
    * @brief IPU option, this will configure the IPU hardware, if inference model in IPU
diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc
index a08c9ab9c..3590cb866 100755
--- a/fastdeploy/runtime/backends/paddle/option_pybind.cc
+++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc
@@ -51,6 +51,8 @@ void BindPaddleOption(pybind11::module& m) {
                      &PaddleBackendOption::mkldnn_cache_size)
       .def_readwrite("gpu_mem_init_size",
                      &PaddleBackendOption::gpu_mem_init_size)
+      .def_readwrite("is_quantize_model",
+		     &PaddleBackendOption::is_quantize_model)
       .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps)
       .def("delete_pass", &PaddleBackendOption::DeletePass)
       .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig);
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index 99e2ab117..f8ee9b31c 100644
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -148,11 +148,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     FDASSERT(ReadBinaryFromFile(model, &model_content),
              "Failed to read file %s.", model.c_str());
   }
-  auto reader =
-      paddle2onnx::PaddleReader(model_content.c_str(), model_content.size());
   // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
   // int8 mode
-  if (reader.is_quantize_model) {
+  if (option.is_quantize_model) {
     if (option.device == Device::GPU) {
       FDWARNING << "The loaded model is a quantized model, while inference on "
                    "GPU, please use TensorRT backend to get better performance."
@@ -184,25 +182,25 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     }
   }
 
-  inputs_desc_.resize(reader.num_inputs);
-  for (int i = 0; i < reader.num_inputs; ++i) {
-    std::string name(reader.inputs[i].name);
-    std::vector<int64_t> shape(reader.inputs[i].shape,
-                               reader.inputs[i].shape + reader.inputs[i].rank);
-    inputs_desc_[i].name = name;
-    inputs_desc_[i].shape.assign(shape.begin(), shape.end());
-    inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
-  }
-  outputs_desc_.resize(reader.num_outputs);
-  for (int i = 0; i < reader.num_outputs; ++i) {
-    std::string name(reader.outputs[i].name);
-    std::vector<int64_t> shape(
-        reader.outputs[i].shape,
-        reader.outputs[i].shape + reader.outputs[i].rank);
-    outputs_desc_[i].name = name;
-    outputs_desc_[i].shape.assign(shape.begin(), shape.end());
-    outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
-  }
+//  inputs_desc_.resize(reader.num_inputs);
+//  for (int i = 0; i < reader.num_inputs; ++i) {
+//    std::string name(reader.inputs[i].name);
+//    std::vector<int64_t> shape(reader.inputs[i].shape,
+//                               reader.inputs[i].shape + reader.inputs[i].rank);
+//    inputs_desc_[i].name = name;
+//    inputs_desc_[i].shape.assign(shape.begin(), shape.end());
+//    inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
+//  }
+//  outputs_desc_.resize(reader.num_outputs);
+//  for (int i = 0; i < reader.num_outputs; ++i) {
+//    std::string name(reader.outputs[i].name);
+//    std::vector<int64_t> shape(
+//        reader.outputs[i].shape,
+//        reader.outputs[i].shape + reader.outputs[i].rank);
+//    outputs_desc_[i].name = name;
+//    outputs_desc_[i].shape.assign(shape.begin(), shape.end());
+//    outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
+//  }
   if (option.collect_trt_shape) {
     // Set the shape info file.
     std::string curr_model_dir = "./";
@@ -253,6 +251,35 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
     }
   }
   predictor_ = paddle_infer::CreatePredictor(config_);
+
+  auto input_names = predictor_->GetInputNames();
+  auto output_names = predictor_->GetOutputNames();
+  auto input_dtypes = predictor_->GetInputTypes();
+  auto output_dtypes = predictor_->GetOutputTypes();
+  auto input_shapes = predictor_->GetInputTensorShape();
+  auto output_shapes = predictor_->GetOutputTensorShape();
+
+  inputs_desc_.resize(input_names.size());
+  for (int i = 0; i < input_names.size(); ++i) {
+    inputs_desc_[i].name = input_names[i];
+    auto iter = input_shapes.find(inputs_desc_[i].name);
+    FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.", inputs_desc_[i].name.c_str());
+    inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = input_dtypes.find(inputs_desc_[i].name);
+    FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.", inputs_desc_[i].name.c_str());
+    inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+  outputs_desc_.resize(output_names.size());
+  for (int i = 0; i < output_names.size(); ++i) {
+    outputs_desc_[i].name = output_names[i];
+    auto iter = output_shapes.find(outputs_desc_[i].name);
+    FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.", outputs_desc_[i].name.c_str());
+    outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
+    auto iter1 = output_dtypes.find(outputs_desc_[i].name);
+    FDASSERT(iter1 != output_dtypes.end(), "Cannot find data type for output %s.", outputs_desc_[i].name.c_str());
+    outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
+  }
+
   initialized_ = true;
   return true;
 }
diff --git a/scripts/patch_paddle_inference.py b/scripts/patch_paddle_inference.py
index e85071ffd..9eeabf37f 100644
--- a/scripts/patch_paddle_inference.py
+++ b/scripts/patch_paddle_inference.py
@@ -26,6 +26,7 @@ def process_paddle_inference(paddle_inference_so_file):
     rpaths = [
         "$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/",
         "$ORIGIN/../../third_party/install/mklml/lib/",
+        "$ORIGIN/../../third_party/install/fdmodel/lib/",
         "$ORIGIN/../../../tensorrt/lib/"
     ]
 
diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc
index 03536f58b..bc9c89252 100644
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -314,7 +314,9 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
                 //       value_string,
                 //       &runtime_options_->trt_max_workspace_size));
               } else if (param_key == "cache_file") {
-                runtime_options_->trt_option.serialize_file = value_string;
+                LOG_MESSAGE(TRITONSERVER_LOG_INFO,
+			    "Skipping setting TRT cache file.");
+                // runtime_options_->trt_option.serialize_file = value_string;
               } else if (param_key == "use_paddle") {
                 runtime_options_->EnablePaddleToTrt();
               } else if (param_key == "use_paddle_log") {