[Other] Optimize paddle backend (#1265)

* Optimize paddle backend * optimize paddle backend * add version support
2025-10-05 08:37:06 +08:00 · 2023-02-08 19:12:03 +08:00
parent 60ba4b06c1
commit a4b0565b9a
10 changed files with 265 additions and 174 deletions
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -24,54 +24,71 @@

 namespace fastdeploy {

+/*! @brief Option object to configure GraphCore IPU
+ */
 struct IpuOption {
+  /// IPU device id
  int ipu_device_num;
+  /// the batch size in the graph, only work when graph has no batch shape info
  int ipu_micro_batch_size;
+  /// enable pipelining
  bool ipu_enable_pipelining;
+  /// the number of batches per run in pipelining
  int ipu_batches_per_step;
+  /// enable fp16
  bool ipu_enable_fp16;
+  /// the number of graph replication
  int ipu_replica_num;
+  /// the available memory proportion for matmul/conv
  float ipu_available_memory_proportion;
+  /// enable fp16 partial for matmul, only work with fp16
  bool ipu_enable_half_partial;
 };

+/*! @brief Option object to configure Paddle Inference backend
+ */
 struct PaddleBackendOption {
+  /// Print log information while initialize Paddle Inference backend
+  bool enable_log_info = false;
+  /// Enable MKLDNN while inference on CPU
+  bool enable_mkldnn = true;
+  /// Use Paddle Inference + TensorRT to inference model on GPU
+  bool enable_trt = false;
+
+  /*
+   * @brief IPU option, this will configure the IPU hardware, if inference model in IPU
+   */
+  IpuOption ipu_option;
+
+  /// Collect shape for model while enabel_trt is true
+  bool collect_trt_shape = false;
+  /// Cache input shape for mkldnn while the input data will change dynamiclly
+  int mkldnn_cache_size = -1;
+  /// initialize memory size(MB) for GPU
+  int gpu_mem_init_size = 100;
+
+  void DisableTrtOps(const std::vector<std::string>& ops) {
+    trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
+  }
+
+  void DeletePass(const std::string& pass_name) {
+    delete_pass_names.push_back(pass_name);
+  }
+
+  // The belowing parameters may be removed, please do not
+  // read or write them directly
+  TrtBackendOption trt_option;
+  bool enable_pinned_memory = false;
+  void* external_stream_ = nullptr;
+  Device device = Device::CPU;
+  int device_id = 0;
+  std::vector<std::string> trt_disabled_ops_{};
+  int cpu_thread_num = 8;
+  std::vector<std::string> delete_pass_names = {};
  std::string model_file = "";   // Path of model file
  std::string params_file = "";  // Path of parameters file, can be empty

  // load model and paramters from memory
  bool model_from_memory_ = false;
-
-#ifdef WITH_GPU
-  bool use_gpu = true;
-#else
-  bool use_gpu = false;
-#endif
-  bool enable_mkldnn = true;
-
-  bool enable_log_info = false;
-
-  bool enable_trt = false;
-  TrtBackendOption trt_option;
-  bool collect_shape = false;
-  std::vector<std::string> trt_disabled_ops_{};
-
-#ifdef WITH_IPU
-  bool use_ipu = true;
-  IpuOption ipu_option;
-#else
-  bool use_ipu = false;
-#endif
-
-  int mkldnn_cache_size = 1;
-  int cpu_thread_num = 8;
-  // initialize memory size(MB) for GPU
-  int gpu_mem_init_size = 100;
-  // gpu device id
-  int gpu_id = 0;
-  bool enable_pinned_memory = false;
-  void* external_stream_ = nullptr;
-
-  std::vector<std::string> delete_pass_names = {};
 };
 }  // namespace fastdeploy