From a711f99c694b08c509c9c14ab32f90596eda06da Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 13:10:40 +0800 Subject: [PATCH] [Other] Optimize runtime module (#1211) * modify ort option * update code * Unify backend --- fastdeploy/pybind/runtime.cc | 4 -- fastdeploy/runtime/backends/backend.h | 5 ++ fastdeploy/runtime/backends/ort/option.h | 8 +--- .../runtime/backends/ort/ort_backend.cc | 48 +++++++++++++++++-- fastdeploy/runtime/backends/ort/ort_backend.h | 16 ++++--- fastdeploy/runtime/runtime.cc | 46 +----------------- fastdeploy/runtime/runtime_option.cc | 3 +- fastdeploy/runtime/runtime_option.h | 9 +--- 8 files changed, 63 insertions(+), 76 deletions(-) diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index bdd3f61f8..6c5c65bc2 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -87,10 +87,6 @@ void BindRuntime(pybind11::module& m) { .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) .def_readwrite("device_id", &RuntimeOption::device_id) .def_readwrite("device", &RuntimeOption::device) - .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level) - .def_readwrite("ort_inter_op_num_threads", - &RuntimeOption::ort_inter_op_num_threads) - .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode) .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape) .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape) .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape) diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h index 5fc6b5d7f..88a8e78a0 100644 --- a/fastdeploy/runtime/backends/backend.h +++ b/fastdeploy/runtime/backends/backend.h @@ -56,6 +56,11 @@ class BaseBackend { virtual bool Initialized() const { return initialized_; } + virtual bool Init(const RuntimeOption& option) { + FDERROR << "Not Implement Yet." << std::endl; + return false; + } + // Get number of inputs of the model virtual int NumInputs() const = 0; // Get number of outputs of the model diff --git a/fastdeploy/runtime/backends/ort/option.h b/fastdeploy/runtime/backends/ort/option.h index 78f117b99..ca4d3254c 100644 --- a/fastdeploy/runtime/backends/ort/option.h +++ b/fastdeploy/runtime/backends/ort/option.h @@ -34,12 +34,8 @@ struct OrtBackendOption { // 0: ORT_SEQUENTIAL // 1: ORT_PARALLEL int execution_mode = -1; - bool use_gpu = false; - int gpu_id = 0; + Device device = Device::CPU; + int device_id = 0; void* external_stream_ = nullptr; - - // inside parameter, maybe remove next version - bool remove_multiclass_nms_ = false; - std::map custom_op_info_; }; } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/ort/ort_backend.cc b/fastdeploy/runtime/backends/ort/ort_backend.cc index 219cce3cf..70cb18121 100644 --- a/fastdeploy/runtime/backends/ort/ort_backend.cc +++ b/fastdeploy/runtime/backends/ort/ort_backend.cc @@ -45,7 +45,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) { if (option.execution_mode >= 0) { session_options_.SetExecutionMode(ExecutionMode(option.execution_mode)); } - if (option.use_gpu) { + if (option.device == Device::GPU) { auto all_providers = Ort::GetAvailableProviders(); bool support_cuda = false; std::string providers_msg = ""; @@ -60,10 +60,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) { "support GPU, the available providers are " << providers_msg << "will fallback to CPUExecutionProvider." << std::endl; - option_.use_gpu = false; + option_.device = Device::CPU; } else { OrtCUDAProviderOptions cuda_options; - cuda_options.device_id = option.gpu_id; + cuda_options.device_id = option.device_id; if (option.external_stream_) { cuda_options.has_user_compute_stream = 1; cuda_options.user_compute_stream = option.external_stream_; @@ -73,6 +73,44 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) { } } +bool OrtBackend::Init(const RuntimeOption& option) { + if (option.device != Device::CPU && option.device != Device::GPU) { + FDERROR + << "Backend::ORT only supports Device::CPU/Device::GPU, but now its " + << option.device << "." << std::endl; + return false; + } + OrtBackendOption ort_option = option.ort_option; + ort_option.device = option.device; + ort_option.device_id = option.device_id; + ort_option.external_stream_ = option.external_stream_; + + if (option.model_format == ModelFormat::PADDLE) { + if (option.model_from_memory_) { + return InitFromPaddle(option.model_file, option.params_file, ort_option); + } + std::string model_buffer, params_buffer; + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), + "Failed to read model file."); + FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), + "Failed to read parameters file."); + return InitFromPaddle(model_buffer, params_buffer, ort_option); + } else if (option.model_format == ModelFormat::ONNX) { + if (option.model_from_memory_) { + return InitFromOnnx(option.model_file, ort_option); + } + std::string model_buffer; + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), + "Failed to read model file."); + return InitFromOnnx(model_buffer, ort_option); + } else { + FDERROR << "Only support Paddle/ONNX model format for OrtBackend." + << std::endl; + return false; + } + return false; +} + bool OrtBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const OrtBackendOption& option, bool verbose) { @@ -221,7 +259,7 @@ bool OrtBackend::Infer(std::vector& inputs, // from FDTensor to Ort Inputs for (size_t i = 0; i < inputs.size(); ++i) { - auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu); + auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU); binding_->BindInput(inputs[i].name.c_str(), ort_value); } @@ -297,7 +335,7 @@ void OrtBackend::InitCustomOperators() { if (custom_operators_.size() == 0) { MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{}; custom_operators_.push_back(multiclass_nms); - if (option_.use_gpu) { + if (option_.device == Device::GPU) { AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CUDAExecutionProvider"}; custom_operators_.push_back(adaptive_pool2d); diff --git a/fastdeploy/runtime/backends/ort/ort_backend.h b/fastdeploy/runtime/backends/ort/ort_backend.h index f3942dc45..61308b9da 100644 --- a/fastdeploy/runtime/backends/ort/ort_backend.h +++ b/fastdeploy/runtime/backends/ort/ort_backend.h @@ -39,13 +39,7 @@ class OrtBackend : public BaseBackend { void BuildOption(const OrtBackendOption& option); - bool InitFromPaddle(const std::string& model_buffer, - const std::string& params_buffer, - const OrtBackendOption& option = OrtBackendOption(), - bool verbose = false); - - bool InitFromOnnx(const std::string& model_buffer, - const OrtBackendOption& option = OrtBackendOption()); + bool Init(const RuntimeOption& option); bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; @@ -62,6 +56,14 @@ class OrtBackend : public BaseBackend { void InitCustomOperators(); private: + bool InitFromPaddle(const std::string& model_buffer, + const std::string& params_buffer, + const OrtBackendOption& option = OrtBackendOption(), + bool verbose = false); + + bool InitFromOnnx(const std::string& model_buffer, + const OrtBackendOption& option = OrtBackendOption()); + Ort::Env env_; Ort::Session session_{nullptr}; Ort::SessionOptions session_options_; diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index e5df860e7..f0347805d 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -341,53 +341,9 @@ void Runtime::CreateOpenVINOBackend() { } void Runtime::CreateOrtBackend() { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::ORT only supports Device::CPU/Device::GPU."); - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "OrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); #ifdef ENABLE_ORT_BACKEND - auto ort_option = OrtBackendOption(); - ort_option.graph_optimization_level = option.ort_graph_opt_level; - ort_option.intra_op_num_threads = option.cpu_thread_num; - ort_option.inter_op_num_threads = option.ort_inter_op_num_threads; - ort_option.execution_mode = option.ort_execution_mode; - ort_option.use_gpu = (option.device == Device::GPU) ? true : false; - ort_option.gpu_id = option.device_id; - ort_option.external_stream_ = option.external_stream_; backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (option.model_format == ModelFormat::ONNX) { - if (option.model_from_memory_) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), - "Load model from ONNX failed while initliazing OrtBackend."); - ReleaseModelMemoryBuffer(); - } else { - std::string model_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), - "Fail to read binary from model file"); - FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option), - "Load model from ONNX failed while initliazing OrtBackend."); - } - } else { - if (option.model_from_memory_) { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, ort_option), - "Load model from Paddle failed while initliazing OrtBackend."); - ReleaseModelMemoryBuffer(); - } else { - std::string model_buffer = ""; - std::string params_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), - "Fail to read binary from model file"); - FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), - "Fail to read binary from parameter file"); - FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer, - ort_option), - "Load model from Paddle failed while initliazing OrtBackend."); - } - } + FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT."); #else FDASSERT(false, "OrtBackend is not available, please compiled with " diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 29e94c1dd..0798786b9 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -97,6 +97,7 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) { FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); cpu_thread_num = thread_num; paddle_lite_option.threads = thread_num; + ort_option.intra_op_num_threads = thread_num; } void RuntimeOption::SetOrtGraphOptLevel(int level) { @@ -104,7 +105,7 @@ void RuntimeOption::SetOrtGraphOptLevel(int level) { auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end(); FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); - ort_graph_opt_level = level; + ort_option.graph_optimization_level = level; } // use paddle inference backend diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index 8681b0c9c..3aea02e0d 100644 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -360,14 +360,7 @@ struct FASTDEPLOY_DECL RuntimeOption { bool enable_pinned_memory = false; - // ======Only for ORT Backend======== - // -1 means use default value by ort - // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3: - // ORT_ENABLE_ALL - int ort_graph_opt_level = -1; - int ort_inter_op_num_threads = -1; - // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL - int ort_execution_mode = -1; + OrtBackendOption ort_option; // ======Only for Paddle Backend===== bool pd_enable_mkldnn = true;