mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Other] Optimize runtime module (#1211)
* modify ort option * update code * Unify backend
This commit is contained in:
@@ -87,10 +87,6 @@ void BindRuntime(pybind11::module& m) {
|
|||||||
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
||||||
.def_readwrite("device_id", &RuntimeOption::device_id)
|
.def_readwrite("device_id", &RuntimeOption::device_id)
|
||||||
.def_readwrite("device", &RuntimeOption::device)
|
.def_readwrite("device", &RuntimeOption::device)
|
||||||
.def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
|
|
||||||
.def_readwrite("ort_inter_op_num_threads",
|
|
||||||
&RuntimeOption::ort_inter_op_num_threads)
|
|
||||||
.def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
|
|
||||||
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
||||||
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
||||||
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
||||||
|
@@ -56,6 +56,11 @@ class BaseBackend {
|
|||||||
|
|
||||||
virtual bool Initialized() const { return initialized_; }
|
virtual bool Initialized() const { return initialized_; }
|
||||||
|
|
||||||
|
virtual bool Init(const RuntimeOption& option) {
|
||||||
|
FDERROR << "Not Implement Yet." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Get number of inputs of the model
|
// Get number of inputs of the model
|
||||||
virtual int NumInputs() const = 0;
|
virtual int NumInputs() const = 0;
|
||||||
// Get number of outputs of the model
|
// Get number of outputs of the model
|
||||||
|
@@ -34,12 +34,8 @@ struct OrtBackendOption {
|
|||||||
// 0: ORT_SEQUENTIAL
|
// 0: ORT_SEQUENTIAL
|
||||||
// 1: ORT_PARALLEL
|
// 1: ORT_PARALLEL
|
||||||
int execution_mode = -1;
|
int execution_mode = -1;
|
||||||
bool use_gpu = false;
|
Device device = Device::CPU;
|
||||||
int gpu_id = 0;
|
int device_id = 0;
|
||||||
void* external_stream_ = nullptr;
|
void* external_stream_ = nullptr;
|
||||||
|
|
||||||
// inside parameter, maybe remove next version
|
|
||||||
bool remove_multiclass_nms_ = false;
|
|
||||||
std::map<std::string, std::string> custom_op_info_;
|
|
||||||
};
|
};
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -45,7 +45,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
|
|||||||
if (option.execution_mode >= 0) {
|
if (option.execution_mode >= 0) {
|
||||||
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
|
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
|
||||||
}
|
}
|
||||||
if (option.use_gpu) {
|
if (option.device == Device::GPU) {
|
||||||
auto all_providers = Ort::GetAvailableProviders();
|
auto all_providers = Ort::GetAvailableProviders();
|
||||||
bool support_cuda = false;
|
bool support_cuda = false;
|
||||||
std::string providers_msg = "";
|
std::string providers_msg = "";
|
||||||
@@ -60,10 +60,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
|
|||||||
"support GPU, the available providers are "
|
"support GPU, the available providers are "
|
||||||
<< providers_msg << "will fallback to CPUExecutionProvider."
|
<< providers_msg << "will fallback to CPUExecutionProvider."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
option_.use_gpu = false;
|
option_.device = Device::CPU;
|
||||||
} else {
|
} else {
|
||||||
OrtCUDAProviderOptions cuda_options;
|
OrtCUDAProviderOptions cuda_options;
|
||||||
cuda_options.device_id = option.gpu_id;
|
cuda_options.device_id = option.device_id;
|
||||||
if (option.external_stream_) {
|
if (option.external_stream_) {
|
||||||
cuda_options.has_user_compute_stream = 1;
|
cuda_options.has_user_compute_stream = 1;
|
||||||
cuda_options.user_compute_stream = option.external_stream_;
|
cuda_options.user_compute_stream = option.external_stream_;
|
||||||
@@ -73,6 +73,44 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool OrtBackend::Init(const RuntimeOption& option) {
|
||||||
|
if (option.device != Device::CPU && option.device != Device::GPU) {
|
||||||
|
FDERROR
|
||||||
|
<< "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
|
||||||
|
<< option.device << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
OrtBackendOption ort_option = option.ort_option;
|
||||||
|
ort_option.device = option.device;
|
||||||
|
ort_option.device_id = option.device_id;
|
||||||
|
ort_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
|
if (option.model_format == ModelFormat::PADDLE) {
|
||||||
|
if (option.model_from_memory_) {
|
||||||
|
return InitFromPaddle(option.model_file, option.params_file, ort_option);
|
||||||
|
}
|
||||||
|
std::string model_buffer, params_buffer;
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||||
|
"Failed to read model file.");
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||||
|
"Failed to read parameters file.");
|
||||||
|
return InitFromPaddle(model_buffer, params_buffer, ort_option);
|
||||||
|
} else if (option.model_format == ModelFormat::ONNX) {
|
||||||
|
if (option.model_from_memory_) {
|
||||||
|
return InitFromOnnx(option.model_file, ort_option);
|
||||||
|
}
|
||||||
|
std::string model_buffer;
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||||
|
"Failed to read model file.");
|
||||||
|
return InitFromOnnx(model_buffer, ort_option);
|
||||||
|
} else {
|
||||||
|
FDERROR << "Only support Paddle/ONNX model format for OrtBackend."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
|
bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
|
||||||
const std::string& params_buffer,
|
const std::string& params_buffer,
|
||||||
const OrtBackendOption& option, bool verbose) {
|
const OrtBackendOption& option, bool verbose) {
|
||||||
@@ -221,7 +259,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
|
|
||||||
// from FDTensor to Ort Inputs
|
// from FDTensor to Ort Inputs
|
||||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||||
auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
|
auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU);
|
||||||
binding_->BindInput(inputs[i].name.c_str(), ort_value);
|
binding_->BindInput(inputs[i].name.c_str(), ort_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,7 +335,7 @@ void OrtBackend::InitCustomOperators() {
|
|||||||
if (custom_operators_.size() == 0) {
|
if (custom_operators_.size() == 0) {
|
||||||
MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
|
MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
|
||||||
custom_operators_.push_back(multiclass_nms);
|
custom_operators_.push_back(multiclass_nms);
|
||||||
if (option_.use_gpu) {
|
if (option_.device == Device::GPU) {
|
||||||
AdaptivePool2dOp* adaptive_pool2d =
|
AdaptivePool2dOp* adaptive_pool2d =
|
||||||
new AdaptivePool2dOp{"CUDAExecutionProvider"};
|
new AdaptivePool2dOp{"CUDAExecutionProvider"};
|
||||||
custom_operators_.push_back(adaptive_pool2d);
|
custom_operators_.push_back(adaptive_pool2d);
|
||||||
|
@@ -39,13 +39,7 @@ class OrtBackend : public BaseBackend {
|
|||||||
|
|
||||||
void BuildOption(const OrtBackendOption& option);
|
void BuildOption(const OrtBackendOption& option);
|
||||||
|
|
||||||
bool InitFromPaddle(const std::string& model_buffer,
|
bool Init(const RuntimeOption& option);
|
||||||
const std::string& params_buffer,
|
|
||||||
const OrtBackendOption& option = OrtBackendOption(),
|
|
||||||
bool verbose = false);
|
|
||||||
|
|
||||||
bool InitFromOnnx(const std::string& model_buffer,
|
|
||||||
const OrtBackendOption& option = OrtBackendOption());
|
|
||||||
|
|
||||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||||
bool copy_to_fd = true) override;
|
bool copy_to_fd = true) override;
|
||||||
@@ -62,6 +56,14 @@ class OrtBackend : public BaseBackend {
|
|||||||
void InitCustomOperators();
|
void InitCustomOperators();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool InitFromPaddle(const std::string& model_buffer,
|
||||||
|
const std::string& params_buffer,
|
||||||
|
const OrtBackendOption& option = OrtBackendOption(),
|
||||||
|
bool verbose = false);
|
||||||
|
|
||||||
|
bool InitFromOnnx(const std::string& model_buffer,
|
||||||
|
const OrtBackendOption& option = OrtBackendOption());
|
||||||
|
|
||||||
Ort::Env env_;
|
Ort::Env env_;
|
||||||
Ort::Session session_{nullptr};
|
Ort::Session session_{nullptr};
|
||||||
Ort::SessionOptions session_options_;
|
Ort::SessionOptions session_options_;
|
||||||
|
@@ -341,53 +341,9 @@ void Runtime::CreateOpenVINOBackend() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateOrtBackend() {
|
void Runtime::CreateOrtBackend() {
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
|
||||||
"Backend::ORT only supports Device::CPU/Device::GPU.");
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
|
||||||
option.model_format == ModelFormat::ONNX,
|
|
||||||
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
|
||||||
"ModelFormat::ONNX.");
|
|
||||||
#ifdef ENABLE_ORT_BACKEND
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
auto ort_option = OrtBackendOption();
|
|
||||||
ort_option.graph_optimization_level = option.ort_graph_opt_level;
|
|
||||||
ort_option.intra_op_num_threads = option.cpu_thread_num;
|
|
||||||
ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
|
|
||||||
ort_option.execution_mode = option.ort_execution_mode;
|
|
||||||
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
|
||||||
ort_option.gpu_id = option.device_id;
|
|
||||||
ort_option.external_stream_ = option.external_stream_;
|
|
||||||
backend_ = utils::make_unique<OrtBackend>();
|
backend_ = utils::make_unique<OrtBackend>();
|
||||||
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
|
FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
|
||||||
if (option.model_format == ModelFormat::ONNX) {
|
|
||||||
if (option.model_from_memory_) {
|
|
||||||
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
|
|
||||||
"Load model from ONNX failed while initliazing OrtBackend.");
|
|
||||||
ReleaseModelMemoryBuffer();
|
|
||||||
} else {
|
|
||||||
std::string model_buffer = "";
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
|
||||||
"Fail to read binary from model file");
|
|
||||||
FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
|
|
||||||
"Load model from ONNX failed while initliazing OrtBackend.");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (option.model_from_memory_) {
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
|
||||||
option.params_file, ort_option),
|
|
||||||
"Load model from Paddle failed while initliazing OrtBackend.");
|
|
||||||
ReleaseModelMemoryBuffer();
|
|
||||||
} else {
|
|
||||||
std::string model_buffer = "";
|
|
||||||
std::string params_buffer = "";
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
|
||||||
"Fail to read binary from model file");
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
|
||||||
"Fail to read binary from parameter file");
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
|
||||||
ort_option),
|
|
||||||
"Load model from Paddle failed while initliazing OrtBackend.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
FDASSERT(false,
|
FDASSERT(false,
|
||||||
"OrtBackend is not available, please compiled with "
|
"OrtBackend is not available, please compiled with "
|
||||||
|
@@ -97,6 +97,7 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
|
|||||||
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
|
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
|
||||||
cpu_thread_num = thread_num;
|
cpu_thread_num = thread_num;
|
||||||
paddle_lite_option.threads = thread_num;
|
paddle_lite_option.threads = thread_num;
|
||||||
|
ort_option.intra_op_num_threads = thread_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::SetOrtGraphOptLevel(int level) {
|
void RuntimeOption::SetOrtGraphOptLevel(int level) {
|
||||||
@@ -104,7 +105,7 @@ void RuntimeOption::SetOrtGraphOptLevel(int level) {
|
|||||||
auto valid_level = std::find(supported_level.begin(), supported_level.end(),
|
auto valid_level = std::find(supported_level.begin(), supported_level.end(),
|
||||||
level) != supported_level.end();
|
level) != supported_level.end();
|
||||||
FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
|
FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
|
||||||
ort_graph_opt_level = level;
|
ort_option.graph_optimization_level = level;
|
||||||
}
|
}
|
||||||
|
|
||||||
// use paddle inference backend
|
// use paddle inference backend
|
||||||
|
@@ -360,14 +360,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
|
|
||||||
bool enable_pinned_memory = false;
|
bool enable_pinned_memory = false;
|
||||||
|
|
||||||
// ======Only for ORT Backend========
|
OrtBackendOption ort_option;
|
||||||
// -1 means use default value by ort
|
|
||||||
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
|
|
||||||
// ORT_ENABLE_ALL
|
|
||||||
int ort_graph_opt_level = -1;
|
|
||||||
int ort_inter_op_num_threads = -1;
|
|
||||||
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
|
|
||||||
int ort_execution_mode = -1;
|
|
||||||
|
|
||||||
// ======Only for Paddle Backend=====
|
// ======Only for Paddle Backend=====
|
||||||
bool pd_enable_mkldnn = true;
|
bool pd_enable_mkldnn = true;
|
||||||
|
Reference in New Issue
Block a user