From dccb737d8d8159995b86c5ff47d675c7a301920d Mon Sep 17 00:00:00 2001
From: Jack Zhou <zhoushunjie@baidu.com>
Date: Thu, 20 Oct 2022 17:02:56 +0800
Subject: [PATCH] [Backend] Add collect shape for pp-trt backend (#372)

* Add collect_shape attr

* add EnableTunedTensorRtDynamicShape

* Add collect shape python api

* Fix quant model not set trt dynamic shape

* Add shape info print

* Fix shape print

* Use CopyFromCpu instead of ShareExternalData

* Add ENABLE_TRT_BACKEND macro

* Add shared data with
---
 fastdeploy/backends/paddle/paddle_backend.cc | 138 ++++++++++++++++---
 fastdeploy/backends/paddle/paddle_backend.h  |  10 ++
 fastdeploy/backends/paddle/util.cc           |  18 ++-
 fastdeploy/pybind/runtime.cc                 |   2 +
 fastdeploy/runtime.cc                        |   9 ++
 fastdeploy/runtime.h                         |  12 ++
 fastdeploy/utils/path.h                      |  74 ++++++++++
 python/fastdeploy/runtime.py                 |   6 +
 scripts/patch_paddle_inference.py            |   2 +-
 9 files changed, 251 insertions(+), 20 deletions(-)
 create mode 100644 fastdeploy/utils/path.h
diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc
index ff2e76fb0..674a37954 100644
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include "fastdeploy/backends/paddle/paddle_backend.h"
+#include "fastdeploy/utils/path.h"
+#include <sstream>
 
 namespace fastdeploy {
 
@@ -31,21 +33,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
         use_static = true;
       }
       config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, precision, use_static);
-      std::map<std::string, std::vector<int>> max_shape;
-      std::map<std::string, std::vector<int>> min_shape;
-      std::map<std::string, std::vector<int>> opt_shape;
-      for (const auto& item : option.trt_option.min_shape) {
-        auto max_iter = option.trt_option.max_shape.find(item.first);
-        auto opt_iter = option.trt_option.opt_shape.find(item.first);
-        FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str());
-        FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str());
-        max_shape[item.first].assign(max_iter->second.begin(), max_iter->second.end());
-        opt_shape[item.first].assign(opt_iter->second.begin(), opt_iter->second.end());
-        min_shape[item.first].assign(item.second.begin(), item.second.end());
-      }
-      if (min_shape.size() > 0) {
-        config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
-      }
+      SetTRTDynamicShapeToConfig(option);
 #else
       FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
 #endif
@@ -97,6 +85,17 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
   if (reader.is_quantize_model) {
     if (option.use_gpu) {
       FDWARNING << "The loaded model is a quantized model, while inference on GPU, please use TensorRT backend to get better performance." << std::endl;
+      if (option.enable_trt) {
+#ifdef ENABLE_TRT_BACKEND
+        bool use_static = false;
+        if (option.trt_option.serialize_file != "") {
+          FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
+          use_static = true;
+        }
+        config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, paddle_infer::PrecisionType::kInt8, use_static, false);
+        SetTRTDynamicShapeToConfig(option);
+#endif
+      }
     }
     if (option.enable_mkldnn) {
       config_.EnableMkldnnInt8();
@@ -123,7 +122,31 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
     outputs_desc_[i].shape.assign(shape.begin(), shape.end());
     outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
   }
-
+#ifdef ENABLE_TRT_BACKEND
+  if (option.collect_shape) {
+    // Set the shape info file.
+    auto curr_model_dir = GetDirFromPath(model_file);
+    std::string shape_range_info = PathJoin(curr_model_dir, "shape_range_info.pbtxt");
+    if (!CheckFileExists(shape_range_info)) {
+      FDINFO << "Start generating shape range info file." << std::endl;
+      paddle_infer::Config analysis_config;
+      analysis_config.SetModel(model_file, params_file);
+      analysis_config.CollectShapeRangeInfo(shape_range_info);
+      auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
+      std::map<std::string, std::vector<int>> max_shape;
+      std::map<std::string, std::vector<int>> min_shape;
+      std::map<std::string, std::vector<int>> opt_shape;
+      GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+      // Need to run once to get the shape range info file.
+      CollectShapeRun(predictor_tmp.get(), max_shape);
+      CollectShapeRun(predictor_tmp.get(), min_shape);
+      CollectShapeRun(predictor_tmp.get(), opt_shape);
+      FDINFO << "Finish generating shape range info file." << std::endl;
+    }
+    FDINFO << "Start loading shape range info file "<< shape_range_info << " to set TensorRT dynamic shape." << std::endl;
+    config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
+  }
+#endif
   predictor_ = paddle_infer::CreatePredictor(config_);
   initialized_ = true;
   return true;
@@ -172,4 +195,87 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
   return true;
 }
 
+#ifdef ENABLE_TRT_BACKEND
+void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
+    std::map<std::string, std::vector<int>> max_shape;
+    std::map<std::string, std::vector<int>> min_shape;
+    std::map<std::string, std::vector<int>> opt_shape;
+    GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+    FDINFO << "Start setting trt dynamic shape." << std::endl;
+    if (min_shape.size() > 0) {
+      config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
+    }
+    FDINFO << "Finish setting trt dynamic shape." << std::endl;
+}
+
+void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option,
+      std::map<std::string, std::vector<int>>* max_shape,
+      std::map<std::string, std::vector<int>>* min_shape,
+      std::map<std::string, std::vector<int>>* opt_shape) const {
+  auto print_shape = [](const std::vector<int>& shape) -> std::string {
+    std::ostringstream oss;
+    oss << "[";
+    for (int i = 0; i < shape.size(); ++i) {
+      oss << shape[i];
+      if (i < shape.size() - 1) {
+        oss << ", ";
+      }
+    }
+    oss << "]";
+    return oss.str();
+  };
+  for (const auto& item : option.trt_option.min_shape) {
+    auto max_iter = option.trt_option.max_shape.find(item.first);
+    auto opt_iter = option.trt_option.opt_shape.find(item.first);
+    FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str());
+    FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str());
+    (*max_shape)[item.first].assign(max_iter->second.begin(), max_iter->second.end());
+    (*opt_shape)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end());
+    (*min_shape)[item.first].assign(item.second.begin(), item.second.end());
+    FDINFO << item.first << ": the max shape = " << print_shape(max_iter->second)
+           << ", the min shape = " << print_shape(item.second)
+           << ", the opt shape = " << print_shape(opt_iter->second) << std::endl;
+  }
+}
+
+void PaddleBackend::CollectShapeRun(paddle_infer::Predictor* predictor,
+    const std::map<std::string, std::vector<int>>& shape) const {
+  auto input_names = predictor->GetInputNames();
+  auto input_type = predictor->GetInputTypes();
+  for(auto name : input_names) {
+    FDASSERT(shape.find(name) != shape.end() && input_type.find(name) != input_type.end(),
+      "Paddle Input name [%s] is not one of the trt dynamic shape.", name.c_str());
+    auto tensor = predictor->GetInputHandle(name);
+    auto shape_value = shape.at(name);
+    int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
+                                    std::multiplies<int>());
+    tensor->Reshape(shape_value);
+    auto dtype = input_type[name];
+    switch (dtype) {
+      case paddle_infer::DataType::FLOAT32: {
+        std::vector<float> input_data(shape_num, 1.0);
+        tensor->CopyFromCpu(input_data.data());
+        break;
+      }
+      case paddle_infer::DataType::INT32: {
+        std::vector<int> input_data(shape_num, 1);
+        tensor->CopyFromCpu(input_data.data());
+        break;
+      }
+      case paddle_infer::DataType::INT64: {
+        std::vector<int64_t> input_data(shape_num, 1);
+        tensor->CopyFromCpu(input_data.data());
+        break;
+      }
+      default: {
+        FDASSERT(false, "Input data Paddle backend only supports FP32/INT32/INT64 currently.");
+        break;
+      }
+    }
+  }
+  predictor->Run();
+}
+#endif
+
+
 }  // namespace fastdeploy
diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h
index 0d59a8a33..78b939fea 100755
--- a/fastdeploy/backends/paddle/paddle_backend.h
+++ b/fastdeploy/backends/paddle/paddle_backend.h
@@ -44,6 +44,7 @@ struct PaddleBackendOption {
   bool enable_trt = false;
 #ifdef ENABLE_TRT_BACKEND
   TrtBackendOption trt_option;
+  bool collect_shape = false;
 #endif
 
   int mkldnn_cache_size = 1;
@@ -95,6 +96,15 @@ class PaddleBackend : public BaseBackend {
   std::vector<TensorInfo> GetOutputInfos() override;
 
  private:
+#ifdef ENABLE_TRT_BACKEND
+  void CollectShapeRun(paddle_infer::Predictor* predictor,
+          const std::map<std::string, std::vector<int>>& shape) const;
+  void GetDynamicShapeFromOption(const PaddleBackendOption& option,
+      std::map<std::string, std::vector<int>>* max_shape,
+      std::map<std::string, std::vector<int>>* min_shape,
+      std::map<std::string, std::vector<int>>* opt_shape) const;
+  void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option);
+#endif
   paddle_infer::Config config_;
   std::shared_ptr<paddle_infer::Predictor> predictor_;
   std::vector<TensorInfo> inputs_desc_;
diff --git a/fastdeploy/backends/paddle/util.cc b/fastdeploy/backends/paddle/util.cc
index 498561791..216c788b2 100644
--- a/fastdeploy/backends/paddle/util.cc
+++ b/fastdeploy/backends/paddle/util.cc
@@ -29,16 +29,28 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
   tensor->Reshape(shape);
   auto place = ConvertFDDeviceToPlace(fd_tensor.device);
   if (fd_tensor.dtype == FDDataType::FP32) {
-    tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
+    if (place == paddle_infer::PlaceType::kGPU) {
+       tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
                               shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const float*>(fd_tensor.Data()));
+    }
     return;
   } else if (fd_tensor.dtype == FDDataType::INT32) {
-    tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
+    if (place == paddle_infer::PlaceType::kGPU) {
+       tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
                               shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int32_t*>(fd_tensor.Data()));
+    }
     return;
   } else if (fd_tensor.dtype == FDDataType::INT64) {
-    tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
+    if (place == paddle_infer::PlaceType::kGPU) {
+       tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
                               shape, place);
+    } else {
+      tensor->CopyFromCpu(static_cast<const int64_t*>(fd_tensor.Data()));
+    }
     return;
   }
   FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
index c2e2df19e..6d8eb7804 100755
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -44,6 +44,8 @@ void BindRuntime(pybind11::module& m) {
       .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
       .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
       .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
+      .def("enable_paddle_trt_collect_shape", &RuntimeOption::EnablePaddleTrtCollectShape)
+      .def("disable_paddle_trt_collect_shape", &RuntimeOption::DisablePaddleTrtCollectShape)
       .def_readwrite("model_file", &RuntimeOption::model_file)
       .def_readwrite("params_file", &RuntimeOption::params_file)
       .def_readwrite("model_format", &RuntimeOption::model_format)
diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc
index 63ad57eb3..0877402d7 100755
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -388,6 +388,14 @@ bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
            "ENABLE_POROS_BACKEND=ON.");
 #endif
   return true;
+}  
+
+void RuntimeOption::EnablePaddleTrtCollectShape() {
+  pd_collect_shape = true;
+}
+
+void RuntimeOption::DisablePaddleTrtCollectShape() {
+  pd_collect_shape = false;
 }
 
 bool Runtime::Init(const RuntimeOption& _option) {
@@ -498,6 +506,7 @@ void Runtime::CreatePaddleBackend() {
 #ifdef ENABLE_TRT_BACKEND
   if (pd_option.use_gpu && option.pd_enable_trt) {
     pd_option.enable_trt = true;
+    pd_option.collect_shape = option.pd_collect_shape;
     auto trt_option = TrtBackendOption();
     trt_option.gpu_id = option.device_id;
     trt_option.enable_fp16 = option.trt_enable_fp16;
diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h
index 80979218c..32ad1615c 100755
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -204,6 +204,17 @@ struct FASTDEPLOY_DECL RuntimeOption {
    */
   void SetTrtCacheFile(const std::string& cache_file_path);
 
+
+  /**
+   * @brief Enable to collect shape in paddle trt backend
+   */
+  void EnablePaddleTrtCollectShape();
+
+  /**
+   * @brief Disable to collect shape in paddle trt backend
+   */
+  void DisablePaddleTrtCollectShape();
+
   Backend backend = Backend::UNKNOWN;
   // for cpu inference and preprocess
   // default will let the backend choose their own default value
@@ -225,6 +236,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
   bool pd_enable_mkldnn = true;
   bool pd_enable_log_info = false;
   bool pd_enable_trt = false;
+  bool pd_collect_shape = false;
   int pd_mkldnn_cache_size = 1;
   std::vector<std::string> pd_delete_pass_names;
 
diff --git a/fastdeploy/utils/path.h b/fastdeploy/utils/path.h
new file mode 100644
index 000000000..5c9ed7087
--- /dev/null
+++ b/fastdeploy/utils/path.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <fstream>
+#ifdef _MSC_VER
+#define PATH_SEP "\\"
+#else
+#define PATH_SEP "/"
+#endif
+
+namespace fastdeploy {
+
+inline std::string PathJoin(const std::vector<std::string>& paths,
+                            const std::string& sep = PATH_SEP) {
+  if (paths.size() == 1) {
+    return paths[0];
+  }
+  std::string filepath = "";
+  for (const auto& path : paths) {
+    if (filepath == "") {
+      filepath += path;
+      continue;
+    }
+    if (path[0] == sep[0] || filepath.back() == sep[0]) {
+      filepath += path;
+    } else {
+      filepath += sep + path;
+    }
+  }
+  return filepath;
+}
+
+inline std::string PathJoin(const std::string& folder,
+                            const std::string& filename,
+                            const std::string& sep = PATH_SEP) {
+  return PathJoin(std::vector<std::string>{folder, filename}, sep);
+}
+
+inline std::string GetDirFromPath(const std::string& path) {
+  auto pos = path.find_last_of(PATH_SEP);
+  if (pos == std::string::npos) {
+    return "";
+  }
+  // The root path in UNIX systems
+  if (pos == 0) {
+    return "/";
+  }
+  return path.substr(0, pos);
+}
+
+inline bool CheckFileExists(const std::string& path) {
+  std::fstream fin(path, std::ios::in);
+  if (!fin) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace fastdeploy
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index c576369ee..90e64d400 100755
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -329,6 +329,12 @@ class RuntimeOption:
         """
         return self._option.set_trt_max_workspace_size(trt_max_workspace_size)
 
+    def enable_paddle_trt_collect_shape(self):
+        return self._option.enable_paddle_trt_collect_shape()
+
+    def disable_paddle_trt_collect_shape(self):
+        return self._option.disable_paddle_trt_collect_shape()
+
     def __repr__(self):
         attrs = dir(self._option)
         message = "RuntimeOption(\n"
diff --git a/scripts/patch_paddle_inference.py b/scripts/patch_paddle_inference.py
index 2ee0dee9b..f46ab2491 100644
--- a/scripts/patch_paddle_inference.py
+++ b/scripts/patch_paddle_inference.py
@@ -26,7 +26,7 @@ def process_paddle_inference(paddle_inference_so_file):
     rpaths = [
         "$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/",
         "$ORIGIN/../../third_party/install/mklml/lib/",
-        "$ORIGIN/../../../tensorrt/lib"
+        "$ORIGIN/../../../tensorrt/lib/"
     ]
 
     patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf")