mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Polish cmake files and runtime apis (#36)
* Add custom operator for onnxruntime ans fix paddle backend * Polish cmake files and runtime apis * Remove copy libraries * fix some issue * fix bug * fix bug
This commit is contained in:
@@ -36,8 +36,58 @@ bool CheckModelFormat(const std::string& model_file,
|
||||
Frontend GuessModelFormat(const std::string& model_file);
|
||||
|
||||
struct FASTDEPLOY_DECL RuntimeOption {
|
||||
Backend backend = Backend::UNKNOWN;
|
||||
// set path of model file and params file
|
||||
// for onnx, only need to define model_file, but also need to
|
||||
// define model_format
|
||||
// model_format support 'paddle' / 'onnx' now.
|
||||
void SetModelPath(const std::string& model_path,
|
||||
const std::string& params_path = "",
|
||||
const std::string& _model_format = "paddle");
|
||||
|
||||
// set model inference in GPU
|
||||
void UseCpu();
|
||||
|
||||
// set model inference in CPU
|
||||
void UseGpu(int gpu_id = 0);
|
||||
|
||||
// set number of thread while inference in CPU
|
||||
void SetCpuThreadNum(int thread_num);
|
||||
|
||||
// use paddle inference backend
|
||||
void UsePaddleBackend();
|
||||
|
||||
// use onnxruntime backend
|
||||
void UseOrtBackend();
|
||||
|
||||
// use tensorrt backend
|
||||
void UseTrtBackend();
|
||||
|
||||
// enable mkldnn while use paddle inference in CPU
|
||||
void EnablePaddleMKLDNN();
|
||||
// disable mkldnn while use paddle inference in CPU
|
||||
void DisablePaddleMKLDNN();
|
||||
|
||||
// set size of cached shape while enable mkldnn with paddle inference backend
|
||||
void SetPaddleMKLDNNCacheSize(int size);
|
||||
|
||||
// set tensorrt shape while the inputs of model contain dynamic shape
|
||||
// min_shape: the minimum shape
|
||||
// opt_shape: the most common shape while inference, default be empty
|
||||
// max_shape: the maximum shape, default be empty
|
||||
|
||||
// if opt_shape, max_shape are empty, they will keep same with the min_shape
|
||||
// which means the shape will be fixed as min_shape while inference
|
||||
void SetTrtInputShape(
|
||||
const std::string& input_name, const std::vector<int32_t>& min_shape,
|
||||
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
|
||||
const std::vector<int32_t>& max_shape = std::vector<int32_t>());
|
||||
|
||||
// enable half precision while use tensorrt backend
|
||||
void EnableTrtFP16();
|
||||
// disable half precision, change to full precision(float32)
|
||||
void DisableTrtFP16();
|
||||
|
||||
Backend backend = Backend::UNKNOWN;
|
||||
// for cpu inference and preprocess
|
||||
int cpu_thread_num = 8;
|
||||
int device_id = 0;
|
||||
@@ -62,7 +112,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
int pd_mkldnn_cache_size = 1;
|
||||
|
||||
// ======Only for Trt Backend=======
|
||||
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
||||
|
Reference in New Issue
Block a user