[Other] Deprecate some option api and parameters (#1243)

* Optimize Poros backend * fix error * Add more pybind * fix conflicts * add some deprecate notices * [Other] Deprecate some apis in RuntimeOption (#1240) * Deprecate more options * modify serving * Update option.h * fix tensorrt error * Update option_pybind.cc * Update option_pybind.cc * Fix error in serving * fix word spell error
2025-10-06 17:17:14 +08:00 · 2023-02-07 17:57:46 +08:00
parent a18cc0f94c
commit 713afe7f1c
15 changed files with 380 additions and 229 deletions
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
@@ -21,23 +21,64 @@

 namespace fastdeploy {

+/*! @brief Option object to configure TensorRT backend
+ */
 struct TrtBackendOption {
-  std::string model_file = "";   // Path of model file
-  std::string params_file = "";  // Path of parameters file, can be empty
-
-  // format of input model
-  ModelFormat model_format = ModelFormat::AUTOREC;
-
-  int gpu_id = 0;
-  bool enable_fp16 = false;
-  bool enable_int8 = false;
+  /// `max_batch_size`, it's deprecated in TensorRT 8.x
  size_t max_batch_size = 32;
+
+  /// `max_workspace_size` for TensorRT
  size_t max_workspace_size = 1 << 30;
+
+  /*
+   * @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
+   */
+  bool enable_fp16 = false;
+
+  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
+   *
+   * \param[in] tensor_name The name of input for the model which is dynamic shape
+   * \param[in] min The minimal shape for the input tensor
+   * \param[in] opt The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
+   * \param[in] max The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
+   */
+  void SetShape(const std::string& tensor_name,
+                const std::vector<int32_t>& min,
+                const std::vector<int32_t>& opt,
+                const std::vector<int32_t>& max) {
+    min_shape[tensor_name].clear();
+    max_shape[tensor_name].clear();
+    opt_shape[tensor_name].clear();
+    min_shape[tensor_name].assign(min.begin(), min.end());
+    if (opt.size() == 0) {
+      opt_shape[tensor_name].assign(min.begin(), min.end());
+    } else {
+      opt_shape[tensor_name].assign(opt.begin(), opt.end());
+    }
+    if (max.size() == 0) {
+      max_shape[tensor_name].assign(min.begin(), min.end());
+    } else {
+      max_shape[tensor_name].assign(max.begin(), max.end());
+    }
+  }
+  /**
+   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
+   */
+  std::string serialize_file = "";
+
+  // The below parameters may be removed in next version, please do not
+  // visit or use them directly
  std::map<std::string, std::vector<int32_t>> max_shape;
  std::map<std::string, std::vector<int32_t>> min_shape;
  std::map<std::string, std::vector<int32_t>> opt_shape;
-  std::string serialize_file = "";
  bool enable_pinned_memory = false;
  void* external_stream_ = nullptr;
+  int gpu_id = 0;
+  std::string model_file = "";   // Path of model file
+  std::string params_file = "";  // Path of parameters file, can be empty
+  // format of input model
+  ModelFormat model_format = ModelFormat::AUTOREC;
 };
+
+
 }  // namespace fastdeploy