diff --git a/cmake/paddle2onnx.cmake b/cmake/paddle2onnx.cmake index baaac8759..b43eef397 100755 --- a/cmake/paddle2onnx.cmake +++ b/cmake/paddle2onnx.cmake @@ -43,7 +43,7 @@ else() endif(WIN32) set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") -set(PADDLE2ONNX_VERSION "1.0.5") +set(PADDLE2ONNX_VERSION "1.0.6") if(WIN32) set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip") if(NOT CMAKE_CL_64) diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc old mode 100644 new mode 100755 diff --git a/fastdeploy/runtime/backends/ort/option.h b/fastdeploy/runtime/backends/ort/option.h old mode 100644 new mode 100755 index 1509fe0bb..155bcf908 --- a/fastdeploy/runtime/backends/ort/option.h +++ b/fastdeploy/runtime/backends/ort/option.h @@ -15,6 +15,7 @@ #pragma once #include "fastdeploy/core/fd_type.h" +#include "fastdeploy/runtime/enum_variables.h" #include #include #include @@ -25,18 +26,27 @@ namespace fastdeploy { /*! @brief Option object to configure ONNX Runtime backend */ struct OrtBackendOption { - /// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all + /// Level of graph optimization, + /// /-1: mean default(Enable all the optimization strategy) + /// /0: disable all the optimization strategy/1: enable basic strategy + /// /2:enable extend strategy/99: enable all int graph_optimization_level = -1; /// Number of threads to execute the operator, -1: default int intra_op_num_threads = -1; - /// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1. + /// Number of threads to execute the graph, + /// -1: default. This parameter only will bring effects + /// while the `OrtBackendOption::execution_mode` set to 1. int inter_op_num_threads = -1; - /// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly. + /// Execution mode for the graph, -1: default(Sequential mode) + /// /0: Sequential mode, execute the operators in graph one by one. + /// /1: Parallel mode, execute the operators in graph parallelly. int execution_mode = -1; - // Inference device, OrtBackend supports CPU/GPU + /// Inference device, OrtBackend supports CPU/GPU Device device = Device::CPU; - // Inference device id + /// Inference device id int device_id = 0; void* external_stream_ = nullptr; + /// Use fp16 to infer + bool enable_fp16 = false; }; } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/ort/option_pybind.cc b/fastdeploy/runtime/backends/ort/option_pybind.cc index 4b8f47975..15ef2eeb0 100644 --- a/fastdeploy/runtime/backends/ort/option_pybind.cc +++ b/fastdeploy/runtime/backends/ort/option_pybind.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/pybind/main.h" #include "fastdeploy/runtime/backends/ort/option.h" +#include "fastdeploy/pybind/main.h" namespace fastdeploy { @@ -28,7 +28,8 @@ void BindOrtOption(pybind11::module& m) { &OrtBackendOption::inter_op_num_threads) .def_readwrite("execution_mode", &OrtBackendOption::execution_mode) .def_readwrite("device", &OrtBackendOption::device) - .def_readwrite("device_id", &OrtBackendOption::device_id); + .def_readwrite("device_id", &OrtBackendOption::device_id) + .def_readwrite("enable_fp16", &OrtBackendOption::enable_fp16); } } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/ort/ort_backend.cc b/fastdeploy/runtime/backends/ort/ort_backend.cc index a8aa1e7cd..ba874409f 100644 --- a/fastdeploy/runtime/backends/ort/ort_backend.cc +++ b/fastdeploy/runtime/backends/ort/ort_backend.cc @@ -181,22 +181,21 @@ bool OrtBackend::InitFromPaddle(const std::string& model_buffer, strcpy(ops[1].op_name, "pool2d"); strcpy(ops[1].export_op_name, "AdaptivePool2d"); - if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(), - params_buffer.c_str(), params_buffer.size(), - &model_content_ptr, &model_content_size, 11, true, - verbose, true, true, true, ops.data(), 2, - "onnxruntime", nullptr, 0, "", &save_external)) { + if (!paddle2onnx::Export( + model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), + params_buffer.size(), &model_content_ptr, &model_content_size, 11, + true, verbose, true, true, true, ops.data(), 2, "onnxruntime", + nullptr, 0, "", &save_external, false)) { FDERROR << "Error occured while export PaddlePaddle to ONNX format." << std::endl; return false; } - std::string onnx_model_proto(model_content_ptr, model_content_ptr + model_content_size); delete[] model_content_ptr; model_content_ptr = nullptr; if (save_external) { - std::string model_file_name = "model.onnx"; + model_file_name = "model.onnx"; std::fstream f(model_file_name, std::ios::out); FDASSERT(f.is_open(), "Can not open file: %s to save model.", model_file_name.c_str()); @@ -219,6 +218,22 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file, << std::endl; return false; } + std::string onnx_model_buffer; + if (option.enable_fp16) { + if (option.device == Device::CPU) { + FDWARNING << "Turning on FP16 on CPU may result in slower inference." + << std::endl; + } + char* model_content_ptr; + int model_content_size = 0; + paddle2onnx::ConvertFP32ToFP16(model_file.c_str(), model_file.size(), + &model_content_ptr, &model_content_size); + std::string onnx_model_proto(model_content_ptr, + model_content_ptr + model_content_size); + onnx_model_buffer = onnx_model_proto; + } else { + onnx_model_buffer = model_file; + } if (!BuildOption(option)) { FDERROR << "Create Ort option fail." << std::endl; @@ -226,7 +241,19 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file, } InitCustomOperators(); - session_ = {env_, model_file.data(), model_file.size(), session_options_}; + if (model_file_name.size()) { +#ifdef WIN32 + std::wstring widestr = + std::wstring(model_file_name.begin(), model_file_name.end()); + session_ = {env_, widestr.c_str(), session_options_}; +#else + session_ = {env_, model_file_name.c_str(), session_options_}; +#endif + } else { + session_ = {env_, onnx_model_buffer.data(), onnx_model_buffer.size(), + session_options_}; + } + binding_ = std::make_shared(session_); Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); diff --git a/fastdeploy/runtime/backends/ort/ort_backend.h b/fastdeploy/runtime/backends/ort/ort_backend.h old mode 100644 new mode 100755 index 543b125e9..4b80d0626 --- a/fastdeploy/runtime/backends/ort/ort_backend.h +++ b/fastdeploy/runtime/backends/ort/ort_backend.h @@ -74,6 +74,10 @@ class OrtBackend : public BaseBackend { std::shared_ptr binding_; std::vector inputs_desc_; std::vector outputs_desc_; + + // the ONNX model file name, + // when ONNX is bigger than 2G, we will set this name + std::string model_file_name; #ifndef NON_64_PLATFORM Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle"); #endif diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h old mode 100644 new mode 100755 index a1c2d152d..51032bae1 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -14,7 +14,6 @@ /*! \file runtime_option.h \brief A brief file description. - More details */ @@ -159,11 +158,12 @@ struct FASTDEPLOY_DECL RuntimeOption { RKNPU2BackendOption rknpu2_option; // \brief Set the profile mode as 'true'. - // - // \param[in] inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. + // + // \param[in] inclue_h2d_d2h Whether to + // include time of H2D_D2H for time of runtime. // \param[in] repeat Repeat times for runtime inference. // \param[in] warmup Warmup times for runtime inference. - // + // void EnableProfiling(bool inclue_h2d_d2h = false, int repeat = 100, int warmup = 50) { benchmark_option.enable_profile = true; @@ -178,12 +178,14 @@ struct FASTDEPLOY_DECL RuntimeOption { benchmark_option.enable_profile = false; } - // \brief Enable to check if current backend set by user can be found at valid_xxx_backend. + // \brief Enable to check if current backend set by + // user can be found at valid_xxx_backend. // void EnableValidBackendCheck() { enable_valid_backend_check = true; } - // \brief Disable to check if current backend set by user can be found at valid_xxx_backend. + // \brief Disable to check if current backend set by + // user can be found at valid_xxx_backend. // void DisableValidBackendCheck() { enable_valid_backend_check = false; diff --git a/fastdeploy/vision/facedet/contrib/centerface/postprocessor.h b/fastdeploy/vision/facedet/contrib/centerface/postprocessor.h index 918b8ab1c..336098134 100644 --- a/fastdeploy/vision/facedet/contrib/centerface/postprocessor.h +++ b/fastdeploy/vision/facedet/contrib/centerface/postprocessor.h @@ -64,4 +64,4 @@ class FASTDEPLOY_DECL CenterFacePostprocessor{ } // namespace facedet } // namespace vision -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy diff --git a/fastdeploy/vision/facedet/contrib/centerface/preprocessor.h b/fastdeploy/vision/facedet/contrib/centerface/preprocessor.h index a856306cb..e91811652 100644 --- a/fastdeploy/vision/facedet/contrib/centerface/preprocessor.h +++ b/fastdeploy/vision/facedet/contrib/centerface/preprocessor.h @@ -51,7 +51,6 @@ class FASTDEPLOY_DECL CenterFacePreprocessor{ // target size, tuple of (width, height), default size = {640, 640} std::vector size_; - }; } // namespace facedet