mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Other] Optimize paddle backend (#1265)
* Optimize paddle backend * optimize paddle backend * add version support
This commit is contained in:
@@ -24,54 +24,71 @@
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
/*! @brief Option object to configure GraphCore IPU
|
||||
*/
|
||||
struct IpuOption {
|
||||
/// IPU device id
|
||||
int ipu_device_num;
|
||||
/// the batch size in the graph, only work when graph has no batch shape info
|
||||
int ipu_micro_batch_size;
|
||||
/// enable pipelining
|
||||
bool ipu_enable_pipelining;
|
||||
/// the number of batches per run in pipelining
|
||||
int ipu_batches_per_step;
|
||||
/// enable fp16
|
||||
bool ipu_enable_fp16;
|
||||
/// the number of graph replication
|
||||
int ipu_replica_num;
|
||||
/// the available memory proportion for matmul/conv
|
||||
float ipu_available_memory_proportion;
|
||||
/// enable fp16 partial for matmul, only work with fp16
|
||||
bool ipu_enable_half_partial;
|
||||
};
|
||||
|
||||
/*! @brief Option object to configure Paddle Inference backend
|
||||
*/
|
||||
struct PaddleBackendOption {
|
||||
/// Print log information while initialize Paddle Inference backend
|
||||
bool enable_log_info = false;
|
||||
/// Enable MKLDNN while inference on CPU
|
||||
bool enable_mkldnn = true;
|
||||
/// Use Paddle Inference + TensorRT to inference model on GPU
|
||||
bool enable_trt = false;
|
||||
|
||||
/*
|
||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||
*/
|
||||
IpuOption ipu_option;
|
||||
|
||||
/// Collect shape for model while enabel_trt is true
|
||||
bool collect_trt_shape = false;
|
||||
/// Cache input shape for mkldnn while the input data will change dynamiclly
|
||||
int mkldnn_cache_size = -1;
|
||||
/// initialize memory size(MB) for GPU
|
||||
int gpu_mem_init_size = 100;
|
||||
|
||||
void DisableTrtOps(const std::vector<std::string>& ops) {
|
||||
trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
|
||||
}
|
||||
|
||||
void DeletePass(const std::string& pass_name) {
|
||||
delete_pass_names.push_back(pass_name);
|
||||
}
|
||||
|
||||
// The belowing parameters may be removed, please do not
|
||||
// read or write them directly
|
||||
TrtBackendOption trt_option;
|
||||
bool enable_pinned_memory = false;
|
||||
void* external_stream_ = nullptr;
|
||||
Device device = Device::CPU;
|
||||
int device_id = 0;
|
||||
std::vector<std::string> trt_disabled_ops_{};
|
||||
int cpu_thread_num = 8;
|
||||
std::vector<std::string> delete_pass_names = {};
|
||||
std::string model_file = ""; // Path of model file
|
||||
std::string params_file = ""; // Path of parameters file, can be empty
|
||||
|
||||
// load model and paramters from memory
|
||||
bool model_from_memory_ = false;
|
||||
|
||||
#ifdef WITH_GPU
|
||||
bool use_gpu = true;
|
||||
#else
|
||||
bool use_gpu = false;
|
||||
#endif
|
||||
bool enable_mkldnn = true;
|
||||
|
||||
bool enable_log_info = false;
|
||||
|
||||
bool enable_trt = false;
|
||||
TrtBackendOption trt_option;
|
||||
bool collect_shape = false;
|
||||
std::vector<std::string> trt_disabled_ops_{};
|
||||
|
||||
#ifdef WITH_IPU
|
||||
bool use_ipu = true;
|
||||
IpuOption ipu_option;
|
||||
#else
|
||||
bool use_ipu = false;
|
||||
#endif
|
||||
|
||||
int mkldnn_cache_size = 1;
|
||||
int cpu_thread_num = 8;
|
||||
// initialize memory size(MB) for GPU
|
||||
int gpu_mem_init_size = 100;
|
||||
// gpu device id
|
||||
int gpu_id = 0;
|
||||
bool enable_pinned_memory = false;
|
||||
void* external_stream_ = nullptr;
|
||||
|
||||
std::vector<std::string> delete_pass_names = {};
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user