diff --git a/cmake/openvino.cmake b/cmake/openvino.cmake index d2e1f0c7e..d01eddc12 100644 --- a/cmake/openvino.cmake +++ b/cmake/openvino.cmake @@ -18,6 +18,12 @@ if (OPENVINO_DIRECTORY) message(STATUS "Use the openvino lib specified by user. The OpenVINO path: ${OPENVINO_DIRECTORY}") STRING(REGEX REPLACE "\\\\" "/" OPENVINO_DIRECTORY ${OPENVINO_DIRECTORY}) get_openvino_libs(${OPENVINO_DIRECTORY}/runtime) + include_directories(${OPENVINO_DIRECTORY}/runtime/include ${OPENVINO_DIRECTORY}/runtime/include/ie) + set(OPENVINO_LIB_DIR + "${OPENVINO_DIRECTORY}/runtime/lib/intel64" + CACHE PATH "openvino lib directory." FORCE) + set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}") + else() set(OPENVINO_PROJECT "extern_openvino") diff --git a/fastdeploy/runtime/backends/openvino/option.h b/fastdeploy/runtime/backends/openvino/option.h index 1200bd9c7..33108a978 100644 --- a/fastdeploy/runtime/backends/openvino/option.h +++ b/fastdeploy/runtime/backends/openvino/option.h @@ -30,7 +30,13 @@ struct OpenVINOBackendOption { int cpu_thread_num = -1; /// Number of streams while use OpenVINO - int num_streams = 0; + int num_streams = 1; + + /// Affinity mode + std::string affinity = "YES"; + + /// Performance hint mode + std::string hint = "UNDEFINED"; /** * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... @@ -56,6 +62,39 @@ struct OpenVINOBackendOption { } } + /** + * @brief Set Affinity mode + */ + void SetAffinity (const std::string& _affinity) { + FDASSERT(_affinity == "YES" || _affinity == "NO" || _affinity == "NUMA" || + _affinity == "HYBRID_AWARE", + "The affinity mode should be one of the list " + "['YES', 'NO', 'NUMA', " + "'HYBRID_AWARE'] "); + affinity = _affinity; + } + + /** + * @brief Set the Performance Hint + */ + void SetPerformanceHint (const std::string& _hint) { + FDASSERT(_hint == "LATENCY" || _hint == "THROUGHPUT" || + _hint == "CUMULATIVE_THROUGHPUT" || _hint == "UNDEFINED", + "The performance hint should be one of the list " + "['LATENCY', 'THROUGHPUT', 'CUMULATIVE_THROUGHPUT', " + "'UNDEFINED'] "); + hint = _hint; + } + + /** + * @brief Set the number of streams + */ + void SetStreamNum (int _num_streams) { + FDASSERT(_num_streams > 0, "The stream_num must be greater than 0."); + num_streams = _num_streams; + } + + std::map> shape_infos; std::set cpu_operators{"MulticlassNms"}; }; diff --git a/fastdeploy/runtime/backends/openvino/option_pybind.cc b/fastdeploy/runtime/backends/openvino/option_pybind.cc index ebd069576..f16b7a516 100644 --- a/fastdeploy/runtime/backends/openvino/option_pybind.cc +++ b/fastdeploy/runtime/backends/openvino/option_pybind.cc @@ -22,9 +22,14 @@ void BindOpenVINOOption(pybind11::module& m) { .def(pybind11::init()) .def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num) .def_readwrite("num_streams", &OpenVINOBackendOption::num_streams) + .def_readwrite("affinity", &OpenVINOBackendOption::affinity) + .def_readwrite("hint", &OpenVINOBackendOption::hint) .def("set_device", &OpenVINOBackendOption::SetDevice) .def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo) - .def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators); + .def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators) + .def("set_affinity", &OpenVINOBackendOption::SetAffinity) + .def("set_performance_hint", &OpenVINOBackendOption::SetPerformanceHint) + .def("set_stream_num", &OpenVINOBackendOption::SetStreamNum); } } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/openvino/ov_backend.cc b/fastdeploy/runtime/backends/openvino/ov_backend.cc index 7f569f92c..97efafdb9 100644 --- a/fastdeploy/runtime/backends/openvino/ov_backend.cc +++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc @@ -203,10 +203,10 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file, } ov::AnyMap properties; - if (option_.device == "CPU" && option_.cpu_thread_num > 0) { - properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; - } - if (option_.device == "CPU") { + if (option_.hint == "UNDEFINED") { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { + properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; + } if (option_.num_streams == -1) { properties["NUM_STREAMS"] = ov::streams::AUTO; } else if (option_.num_streams == -2) { @@ -214,17 +214,32 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file, } else if (option_.num_streams > 0) { properties["NUM_STREAMS"] = option_.num_streams; } - } else { - if (option_.num_streams != 0) { - FDWARNING << "NUM_STREAMS only available on device CPU, currently the " - "device is set as " - << option_.device << ", the NUM_STREAMS will be ignored." - << std::endl; + + FDINFO << "number of streams:" << option_.num_streams << "." << std::endl; + if (option_.affinity == "YES") { + properties["AFFINITY"] = "CORE"; + } else if (option_.affinity == "NO") { + properties["AFFINITY"] = "NONE"; + } else if (option_.affinity == "NUMA") { + properties["AFFINITY"] = "NUMA"; + } else if (option_.affinity == "HYBRID_AWARE") { + properties["AFFINITY"] = "HYBRID_AWARE"; } + FDINFO << "affinity:" << option_.affinity << "." << std::endl; + } else if (option_.hint == "LATENCY") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); + } else if (option_.hint == "THROUGHPUT") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + } else if (option_.hint == "CUMULATIVE_THROUGHPUT") { + properties.emplace(ov::hint::performance_mode( + ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); } FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." << std::endl; + compiled_model_ = core_.compile_model(model, option.device, properties); request_ = compiled_model_.create_infer_request(); @@ -332,10 +347,10 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file, } ov::AnyMap properties; - if (option_.device == "CPU" && option_.cpu_thread_num > 0) { - properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; - } - if (option_.device == "CPU") { + if (option_.hint == "UNDEFINED") { + if (option_.device == "CPU" && option_.cpu_thread_num > 0) { + properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num; + } if (option_.num_streams == -1) { properties["NUM_STREAMS"] = ov::streams::AUTO; } else if (option_.num_streams == -2) { @@ -343,13 +358,27 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file, } else if (option_.num_streams > 0) { properties["NUM_STREAMS"] = option_.num_streams; } - } else { - if (option_.num_streams != 0) { - FDWARNING << "NUM_STREAMS only available on device CPU, currently the " - "device is set as " - << option_.device << ", the NUM_STREAMS will be ignored." - << std::endl; + + FDINFO << "number of streams:" << option_.num_streams << "." << std::endl; + if (option_.affinity == "YES") { + properties["AFFINITY"] = "CORE"; + } else if (option_.affinity == "NO") { + properties["AFFINITY"] = "NONE"; + } else if (option_.affinity == "NUMA") { + properties["AFFINITY"] = "NUMA"; + } else if (option_.affinity == "HYBRID_AWARE") { + properties["AFFINITY"] = "HYBRID_AWARE"; } + FDINFO << "affinity:" << option_.affinity << "." << std::endl; + } else if (option_.hint == "LATENCY") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); + } else if (option_.hint == "THROUGHPUT") { + properties.emplace( + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + } else if (option_.hint == "CUMULATIVE_THROUGHPUT") { + properties.emplace(ov::hint::performance_mode( + ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); } FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." @@ -384,7 +413,8 @@ bool OpenVINOBackend::Infer(std::vector& inputs, } RUNTIME_PROFILE_LOOP_BEGIN(1) - request_.infer(); + request_.start_async(); + request_.wait(); RUNTIME_PROFILE_LOOP_END outputs->resize(output_infos_.size());