Polish cmake files and runtime apis (#36)

* Add custom operator for onnxruntime ans fix paddle backend * Polish cmake files and runtime apis * Remove copy libraries * fix some issue * fix bug * fix bug
2025-10-05 16:48:03 +08:00 · 2022-07-25 08:59:53 +08:00
parent 4b681581b1
commit 279c993483
20 changed files with 446 additions and 193 deletions
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -36,8 +36,58 @@ bool CheckModelFormat(const std::string& model_file,
 Frontend GuessModelFormat(const std::string& model_file);

 struct FASTDEPLOY_DECL RuntimeOption {
-  Backend backend = Backend::UNKNOWN;
+  // set path of model file and params file
+  // for onnx, only need to define model_file, but also need to
+  // define model_format
+  // model_format support 'paddle' / 'onnx' now.
+  void SetModelPath(const std::string& model_path,
+                    const std::string& params_path = "",
+                    const std::string& _model_format = "paddle");

+  // set model inference in GPU
+  void UseCpu();
+
+  // set model inference in CPU
+  void UseGpu(int gpu_id = 0);
+
+  // set number of thread while inference in CPU
+  void SetCpuThreadNum(int thread_num);
+
+  // use paddle inference backend
+  void UsePaddleBackend();
+
+  // use onnxruntime backend
+  void UseOrtBackend();
+
+  // use tensorrt backend
+  void UseTrtBackend();
+
+  // enable mkldnn while use paddle inference in CPU
+  void EnablePaddleMKLDNN();
+  // disable mkldnn while use paddle inference in CPU
+  void DisablePaddleMKLDNN();
+
+  // set size of cached shape while enable mkldnn with paddle inference backend
+  void SetPaddleMKLDNNCacheSize(int size);
+
+  // set tensorrt shape while the inputs of model contain dynamic shape
+  // min_shape: the minimum shape
+  // opt_shape: the most common shape while inference, default be empty
+  // max_shape: the maximum shape, default be empty
+
+  // if opt_shape, max_shape are empty, they will keep same with the min_shape
+  // which means the shape will be fixed as min_shape while inference
+  void SetTrtInputShape(
+      const std::string& input_name, const std::vector<int32_t>& min_shape,
+      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
+      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
+
+  // enable half precision while use tensorrt backend
+  void EnableTrtFP16();
+  // disable half precision, change to full precision(float32)
+  void DisableTrtFP16();
+
+  Backend backend = Backend::UNKNOWN;
  // for cpu inference and preprocess
  int cpu_thread_num = 8;
  int device_id = 0;
@@ -62,7 +112,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
  int pd_mkldnn_cache_size = 1;

  // ======Only for Trt Backend=======
-  std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
  std::map<std::string, std::vector<int32_t>> trt_max_shape;
  std::map<std::string, std::vector<int32_t>> trt_min_shape;
  std::map<std::string, std::vector<int32_t>> trt_opt_shape;