From 7c9bf11c44ad4fd2b7d2d5fabf6e87cf2cd1acbd Mon Sep 17 00:00:00 2001 From: Jason Date: Tue, 7 Feb 2023 10:18:03 +0800 Subject: [PATCH] [Other] Optimize Poros backend (#1232) * Optimize Poros backend * fix error * Add more pybind * fix conflicts * add some deprecate notices --- cmake/poros.cmake | 9 +- .../runtime/python/infer_torchscript_poros.py | 1 - fastdeploy/pybind/runtime.cc | 102 +------------ fastdeploy/runtime/backends/lite/option.h | 6 +- .../runtime/backends/lite/option_pybind.cc | 63 ++++++++ .../backends/openvino/option_pybind.cc | 30 ++++ .../runtime/backends/ort/option_pybind.cc | 34 +++++ fastdeploy/runtime/backends/poros/option.h | 8 +- .../runtime/backends/poros/option_pybind.cc | 37 +++++ .../runtime/backends/poros/poros_backend.cc | 80 +--------- .../runtime/backends/poros/poros_backend.h | 7 - fastdeploy/runtime/option_pybind.cc | 129 ++++++++++++++++ fastdeploy/runtime/runtime.cc | 25 ++- fastdeploy/runtime/runtime_option.h | 7 +- python/fastdeploy/runtime.py | 142 +++++++++--------- 15 files changed, 397 insertions(+), 283 deletions(-) create mode 100644 fastdeploy/runtime/backends/lite/option_pybind.cc create mode 100644 fastdeploy/runtime/backends/openvino/option_pybind.cc create mode 100644 fastdeploy/runtime/backends/ort/option_pybind.cc create mode 100644 fastdeploy/runtime/backends/poros/option_pybind.cc create mode 100644 fastdeploy/runtime/option_pybind.cc mode change 100755 => 100644 python/fastdeploy/runtime.py diff --git a/cmake/poros.cmake b/cmake/poros.cmake index 4b206d84a..63ec7c72f 100755 --- a/cmake/poros.cmake +++ b/cmake/poros.cmake @@ -13,6 +13,10 @@ # limitations under the License. include(ExternalProject) +if(NOT ENABLE_TRT_BACKEND) + message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.") +endif() + set(POROS_PROJECT "extern_poros") set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros) set(POROS_SOURCE_DIR @@ -48,9 +52,10 @@ else() if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.") else() - message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.") if(WITH_GPU) set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz") + else() + message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.") endif() endif() endif() @@ -77,7 +82,7 @@ add_dependencies(external_poros ${POROS_PROJECT}) # Download libtorch.so with ABI=1 set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip") -set(TROCH_URL "${TORCH_URL_BASE}${TORCH_FILE}") +set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}") message(STATUS "Use the default Torch lib from: ${TORCH_URL}") download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install) if(EXISTS ${THIRD_PARTY_PATH}/install/torch) diff --git a/examples/runtime/python/infer_torchscript_poros.py b/examples/runtime/python/infer_torchscript_poros.py index de31061f0..974d41ac0 100644 --- a/examples/runtime/python/infer_torchscript_poros.py +++ b/examples/runtime/python/infer_torchscript_poros.py @@ -51,7 +51,6 @@ if __name__ == '__main__': option.use_poros_backend() option.set_model_path( "std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT) - option.is_dynamic = True # compile runtime = fd.Runtime(option) runtime.compile(prewarm_datas) diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 7eeb0fdc2..ca2f4886b 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -16,106 +16,10 @@ namespace fastdeploy { +void BindOption(pybind11::module& m); + void BindRuntime(pybind11::module& m) { - pybind11::class_(m, "RuntimeOption") - .def(pybind11::init()) - .def("set_model_path", &RuntimeOption::SetModelPath) - .def("set_model_buffer", &RuntimeOption::SetModelBuffer) - .def("use_gpu", &RuntimeOption::UseGpu) - .def("use_cpu", &RuntimeOption::UseCpu) - .def("use_rknpu2", &RuntimeOption::UseRKNPU2) - .def("use_sophgo", &RuntimeOption::UseSophgo) - .def("use_ascend", &RuntimeOption::UseAscend) - .def("use_kunlunxin", &RuntimeOption::UseKunlunXin) - .def("set_external_stream", &RuntimeOption::SetExternalStream) - .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) - .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) - .def("use_poros_backend", &RuntimeOption::UsePorosBackend) - .def("use_ort_backend", &RuntimeOption::UseOrtBackend) - .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel) - .def("use_trt_backend", &RuntimeOption::UseTrtBackend) - .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend) - .def("use_lite_backend", &RuntimeOption::UseLiteBackend) - .def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames) - .def("set_lite_context_properties", - &RuntimeOption::SetLiteContextProperties) - .def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir) - .def("set_lite_dynamic_shape_info", - &RuntimeOption::SetLiteDynamicShapeInfo) - .def("set_lite_subgraph_partition_path", - &RuntimeOption::SetLiteSubgraphPartitionPath) - .def("set_lite_mixed_precision_quantization_config_path", - &RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath) - .def("set_lite_subgraph_partition_config_buffer", - &RuntimeOption::SetLiteSubgraphPartitionConfigBuffer) - .def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN) - .def("set_openvino_device", &RuntimeOption::SetOpenVINODevice) - .def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo) - .def("set_openvino_cpu_operators", - &RuntimeOption::SetOpenVINOCpuOperators) - .def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo) - .def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo) - .def("set_paddle_mkldnn_cache_size", - &RuntimeOption::SetPaddleMKLDNNCacheSize) - .def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16) - .def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16) - .def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode) - .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape) - .def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize) - .def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize) - .def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt) - .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16) - .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16) - .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile) - .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory) - .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory) - .def("enable_paddle_trt_collect_shape", - &RuntimeOption::EnablePaddleTrtCollectShape) - .def("disable_paddle_trt_collect_shape", - &RuntimeOption::DisablePaddleTrtCollectShape) - .def("use_ipu", &RuntimeOption::UseIpu) - .def("set_ipu_config", &RuntimeOption::SetIpuConfig) - .def("delete_paddle_backend_pass", - &RuntimeOption::DeletePaddleBackendPass) - .def("enable_profiling", &RuntimeOption::EnableProfiling) - .def("disable_profiling", &RuntimeOption::DisableProfiling) - .def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs) - .def_readwrite("model_file", &RuntimeOption::model_file) - .def_readwrite("params_file", &RuntimeOption::params_file) - .def_readwrite("model_format", &RuntimeOption::model_format) - .def_readwrite("backend", &RuntimeOption::backend) - .def_readwrite("external_stream", &RuntimeOption::external_stream_) - .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_) - .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) - .def_readwrite("device_id", &RuntimeOption::device_id) - .def_readwrite("device", &RuntimeOption::device) - .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape) - .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape) - .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape) - .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file) - .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16) - .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) - .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) - .def_readwrite("trt_max_workspace_size", - &RuntimeOption::trt_max_workspace_size) - .def_readwrite("is_dynamic", &RuntimeOption::is_dynamic) - .def_readwrite("long_to_int", &RuntimeOption::long_to_int) - .def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32) - .def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres) - .def_readwrite("poros_file", &RuntimeOption::poros_file) - .def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num) - .def_readwrite("ipu_micro_batch_size", - &RuntimeOption::ipu_micro_batch_size) - .def_readwrite("ipu_enable_pipelining", - &RuntimeOption::ipu_enable_pipelining) - .def_readwrite("ipu_batches_per_step", - &RuntimeOption::ipu_batches_per_step) - .def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16) - .def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num) - .def_readwrite("ipu_available_memory_proportion", - &RuntimeOption::ipu_available_memory_proportion) - .def_readwrite("ipu_enable_half_partial", - &RuntimeOption::ipu_enable_half_partial); + BindOption(m); pybind11::class_(m, "TensorInfo") .def_readwrite("name", &TensorInfo::name) diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index d94b32251..1ffd01385 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -21,9 +21,7 @@ // FastDepoy static library, default OFF. These messages // are only reserve for debugging. #if defined(WITH_STATIC_WARNING) -#warning You are using the FastDeploy static library. \ -We will automatically add some registration codes for \ -ops, kernels and passes for Paddle Lite. +#warning You are using the FastDeploy static library. We will automatically add some registration codes for ops, kernels and passes for Paddle Lite. // NOLINT #endif #if !defined(WITH_STATIC_LIB_AT_COMPILING) #include "paddle_use_ops.h" // NOLINT @@ -52,7 +50,7 @@ enum LitePowerMode { struct LiteBackendOption { /// Paddle Lite power mode for mobile device. - LitePowerMode power_mode = LITE_POWER_NO_BIND; + int power_mode = 3; /// Number of threads while use CPU int cpu_threads = 1; /// Enable use half precision diff --git a/fastdeploy/runtime/backends/lite/option_pybind.cc b/fastdeploy/runtime/backends/lite/option_pybind.cc new file mode 100644 index 000000000..543255aaf --- /dev/null +++ b/fastdeploy/runtime/backends/lite/option_pybind.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" +#include "fastdeploy/runtime/backends/lite/option.h" + +namespace fastdeploy { + +void BindLiteOption(pybind11::module& m) { + pybind11::class_(m, "LiteBackendOption") + .def(pybind11::init()) + .def_readwrite("power_mode", &LiteBackendOption::power_mode) + .def_readwrite("cpu_threads", &LiteBackendOption::cpu_threads) + .def_readwrite("enable_fp16", &LiteBackendOption::enable_fp16) + .def_readwrite("enable_int8", &LiteBackendOption::enable_int8) + .def_readwrite("device", &LiteBackendOption::device) + .def_readwrite("optimized_model_dir", + &LiteBackendOption::optimized_model_dir) + .def_readwrite( + "nnadapter_subgraph_partition_config_path", + &LiteBackendOption::nnadapter_subgraph_partition_config_path) + .def_readwrite( + "nnadapter_subgraph_partition_config_buffer", + &LiteBackendOption::nnadapter_subgraph_partition_config_buffer) + .def_readwrite("nnadapter_context_properties", + &LiteBackendOption::nnadapter_context_properties) + .def_readwrite("nnadapter_model_cache_dir", + &LiteBackendOption::nnadapter_model_cache_dir) + .def_readwrite("nnadapter_mixed_precision_quantization_config_path", + &LiteBackendOption:: + nnadapter_mixed_precision_quantization_config_path) + .def_readwrite("nnadapter_dynamic_shape_info", + &LiteBackendOption::nnadapter_dynamic_shape_info) + .def_readwrite("nnadapter_device_names", + &LiteBackendOption::nnadapter_device_names) + .def_readwrite("device_id", &LiteBackendOption::device_id) + .def_readwrite("kunlunxin_l3_workspace_size", + &LiteBackendOption::kunlunxin_l3_workspace_size) + .def_readwrite("kunlunxin_locked", &LiteBackendOption::kunlunxin_locked) + .def_readwrite("kunlunxin_autotune", + &LiteBackendOption::kunlunxin_autotune) + .def_readwrite("kunlunxin_autotune_file", + &LiteBackendOption::kunlunxin_autotune_file) + .def_readwrite("kunlunxin_precision", + &LiteBackendOption::kunlunxin_precision) + .def_readwrite("kunlunxin_adaptive_seqlen", + &LiteBackendOption::kunlunxin_adaptive_seqlen) + .def_readwrite("kunlunxin_enable_multi_stream", + &LiteBackendOption::kunlunxin_enable_multi_stream); +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/openvino/option_pybind.cc b/fastdeploy/runtime/backends/openvino/option_pybind.cc new file mode 100644 index 000000000..ebd069576 --- /dev/null +++ b/fastdeploy/runtime/backends/openvino/option_pybind.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" +#include "fastdeploy/runtime/backends/openvino/option.h" + +namespace fastdeploy { + +void BindOpenVINOOption(pybind11::module& m) { + pybind11::class_(m, "OpenVINOBackendOption") + .def(pybind11::init()) + .def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num) + .def_readwrite("num_streams", &OpenVINOBackendOption::num_streams) + .def("set_device", &OpenVINOBackendOption::SetDevice) + .def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo) + .def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators); +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/ort/option_pybind.cc b/fastdeploy/runtime/backends/ort/option_pybind.cc new file mode 100644 index 000000000..4b8f47975 --- /dev/null +++ b/fastdeploy/runtime/backends/ort/option_pybind.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" +#include "fastdeploy/runtime/backends/ort/option.h" + +namespace fastdeploy { + +void BindOrtOption(pybind11::module& m) { + pybind11::class_(m, "OrtBackendOption") + .def(pybind11::init()) + .def_readwrite("graph_optimization_level", + &OrtBackendOption::graph_optimization_level) + .def_readwrite("intra_op_num_threads", + &OrtBackendOption::intra_op_num_threads) + .def_readwrite("inter_op_num_threads", + &OrtBackendOption::inter_op_num_threads) + .def_readwrite("execution_mode", &OrtBackendOption::execution_mode) + .def_readwrite("device", &OrtBackendOption::device) + .def_readwrite("device_id", &OrtBackendOption::device_id); +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/poros/option.h b/fastdeploy/runtime/backends/poros/option.h index 2b715f7dc..22f0d371b 100755 --- a/fastdeploy/runtime/backends/poros/option.h +++ b/fastdeploy/runtime/backends/poros/option.h @@ -23,12 +23,8 @@ namespace fastdeploy { struct PorosBackendOption { -#ifdef WITH_GPU - bool use_gpu = true; -#else - bool use_gpu = false; -#endif - int gpu_id = 0; + Device device = Device::CPU; + int device_id = 0; bool long_to_int = true; // There is calculation precision in tf32 mode on A10, it can bring some // performance improvement, but there may be diff diff --git a/fastdeploy/runtime/backends/poros/option_pybind.cc b/fastdeploy/runtime/backends/poros/option_pybind.cc new file mode 100644 index 000000000..b545ea85c --- /dev/null +++ b/fastdeploy/runtime/backends/poros/option_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" +#include "fastdeploy/runtime/backends/poros/option.h" + +namespace fastdeploy { + +void BindPorosOption(pybind11::module& m) { + pybind11::class_(m, "PorosBackendOption") + .def(pybind11::init()) + .def_readwrite("long_to_int", &PorosBackendOption::long_to_int) + .def_readwrite("use_nvidia_tf32", &PorosBackendOption::use_nvidia_tf32) + .def_readwrite("unconst_ops_thres", + &PorosBackendOption::unconst_ops_thres) + .def_readwrite("prewarm_datatypes", + &PorosBackendOption::prewarm_datatypes) + .def_readwrite("enable_fp16", &PorosBackendOption::enable_fp16) + .def_readwrite("enable_int8", &PorosBackendOption::enable_int8) + .def_readwrite("is_dynamic", &PorosBackendOption::is_dynamic) + .def_readwrite("max_batch_size", &PorosBackendOption::max_batch_size) + .def_readwrite("max_workspace_size", + &PorosBackendOption::max_workspace_size); +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/poros/poros_backend.cc b/fastdeploy/runtime/backends/poros/poros_backend.cc index ebe359b3d..64b07dd91 100644 --- a/fastdeploy/runtime/backends/poros/poros_backend.cc +++ b/fastdeploy/runtime/backends/poros/poros_backend.cc @@ -43,11 +43,12 @@ std::vector PorosBackend::GetOutputInfos() { } void PorosBackend::BuildOption(const PorosBackendOption& option) { - _options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU - : baidu::mirana::poros::Device::CPU; + _options.device = (option.device == Device::GPU) + ? baidu::mirana::poros::Device::GPU + : baidu::mirana::poros::Device::CPU; _options.long_to_int = option.long_to_int; _options.use_nvidia_tf32 = option.use_nvidia_tf32; - _options.device_id = option.gpu_id; + _options.device_id = option.device_id; _options.unconst_ops_thres = option.unconst_ops_thres; _options.is_dynamic = option.is_dynamic; _options.max_workspace_size = option.max_workspace_size; @@ -67,7 +68,7 @@ bool PorosBackend::Compile(const std::string& model_file, torch::jit::Module mod; mod = torch::jit::load(model_file); mod.eval(); - if (option.use_gpu) { + if (option.device == Device::GPU) { mod.to(at::kCUDA); } else { mod.to(at::kCPU); @@ -79,7 +80,7 @@ bool PorosBackend::Compile(const std::string& model_file, _numinputs = inputs.size() - 1; // FDTensor to at::Tensor std::vector> prewarm_datas; - bool is_backend_cuda = option.use_gpu ? true : false; + bool is_backend_cuda = (option.device == Device::GPU); for (size_t i = 0; i < prewarm_tensors.size(); ++i) { std::vector prewarm_data; for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) { @@ -121,73 +122,6 @@ bool PorosBackend::Compile(const std::string& model_file, return true; } -bool PorosBackend::InitFromTorchScript(const std::string& model_file, - const PorosBackendOption& option) { - if (initialized_) { - FDERROR << "PorosBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - if (option.poros_file != "") { - std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in); - if (fin) { - FDINFO << "Detect compiled Poros file in " << option.poros_file - << ", will load it directly." << std::endl; - fin.close(); - return InitFromPoros(option.poros_file, option); - } - } - BuildOption(option); - torch::jit::Module mod; - mod = torch::jit::load(model_file); - mod.eval(); - if (option.use_gpu) { - mod.to(at::kCUDA); - } else { - mod.to(at::kCPU); - } - // get inputs_nums and outputs_nums - auto graph = mod.get_method("forward").graph(); - auto inputs = graph->inputs(); - // remove self node - _numinputs = inputs.size() - 1; - auto outputs = graph->outputs(); - _numoutputs = outputs.size(); - _poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options); - if (_poros_module == nullptr) { - FDERROR << "PorosBackend initlize Failed, try initialize again." - << std::endl; - return false; - } - initialized_ = true; - return true; -} - -bool PorosBackend::InitFromPoros(const std::string& model_file, - const PorosBackendOption& option) { - if (initialized_) { - FDERROR << "PorosBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - BuildOption(option); - _poros_module = baidu::mirana::poros::Load(model_file, _options); - if (_poros_module == nullptr) { - FDERROR << "PorosBackend initlize Failed, try initialize again." - << std::endl; - return false; - } - // get inputs_nums and outputs_nums - auto graph = _poros_module->get_method("forward").graph(); - auto inputs = graph->inputs(); - // remove self node - _numinputs = inputs.size() - 1; - auto outputs = graph->outputs(); - _numoutputs = outputs.size(); - initialized_ = true; - return true; -} - bool PorosBackend::Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd) { // Convert FD Tensor to PyTorch Tensor @@ -238,4 +172,4 @@ bool PorosBackend::Infer(std::vector& inputs, return true; } -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/poros/poros_backend.h b/fastdeploy/runtime/backends/poros/poros_backend.h index 5d15128cf..0d01a6884 100755 --- a/fastdeploy/runtime/backends/poros/poros_backend.h +++ b/fastdeploy/runtime/backends/poros/poros_backend.h @@ -51,13 +51,6 @@ class PorosBackend : public BaseBackend { void BuildOption(const PorosBackendOption& option); - bool - InitFromTorchScript(const std::string& model_file, - const PorosBackendOption& option = PorosBackendOption()); - - bool InitFromPoros(const std::string& model_file, - const PorosBackendOption& option = PorosBackendOption()); - bool Compile(const std::string& model_file, std::vector>& prewarm_tensors, const PorosBackendOption& option = PorosBackendOption()); diff --git a/fastdeploy/runtime/option_pybind.cc b/fastdeploy/runtime/option_pybind.cc new file mode 100644 index 000000000..982d18053 --- /dev/null +++ b/fastdeploy/runtime/option_pybind.cc @@ -0,0 +1,129 @@ +// Cropyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindLiteOption(pybind11::module& m); +void BindOpenVINOOption(pybind11::module& m); +void BindOrtOption(pybind11::module& m); +void BindPorosOption(pybind11::module& m); + +void BindOption(pybind11::module& m) { + BindLiteOption(m); + BindOpenVINOOption(m); + BindOrtOption(m); + BindPorosOption(m); + + pybind11::class_(m, "RuntimeOption") + .def(pybind11::init()) + .def("set_model_path", &RuntimeOption::SetModelPath) + .def("set_model_buffer", &RuntimeOption::SetModelBuffer) + .def("use_gpu", &RuntimeOption::UseGpu) + .def("use_cpu", &RuntimeOption::UseCpu) + .def("use_rknpu2", &RuntimeOption::UseRKNPU2) + .def("use_sophgo", &RuntimeOption::UseSophgo) + .def("use_ascend", &RuntimeOption::UseAscend) + .def("use_kunlunxin", &RuntimeOption::UseKunlunXin) + .def_readwrite("paddle_lite_option", &RuntimeOption::paddle_lite_option) + .def_readwrite("openvino_option", &RuntimeOption::openvino_option) + .def_readwrite("ort_option", &RuntimeOption::ort_option) + .def_readwrite("poros_option", &RuntimeOption::poros_option) + .def("set_external_stream", &RuntimeOption::SetExternalStream) + .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) + .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) + .def("use_poros_backend", &RuntimeOption::UsePorosBackend) + .def("use_ort_backend", &RuntimeOption::UseOrtBackend) + .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel) + .def("use_trt_backend", &RuntimeOption::UseTrtBackend) + .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend) + .def("use_lite_backend", &RuntimeOption::UseLiteBackend) + .def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames) + .def("set_lite_context_properties", + &RuntimeOption::SetLiteContextProperties) + .def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir) + .def("set_lite_dynamic_shape_info", + &RuntimeOption::SetLiteDynamicShapeInfo) + .def("set_lite_subgraph_partition_path", + &RuntimeOption::SetLiteSubgraphPartitionPath) + .def("set_lite_mixed_precision_quantization_config_path", + &RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath) + .def("set_lite_subgraph_partition_config_buffer", + &RuntimeOption::SetLiteSubgraphPartitionConfigBuffer) + .def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN) + .def("set_openvino_device", &RuntimeOption::SetOpenVINODevice) + .def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo) + .def("set_openvino_cpu_operators", + &RuntimeOption::SetOpenVINOCpuOperators) + .def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo) + .def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo) + .def("set_paddle_mkldnn_cache_size", + &RuntimeOption::SetPaddleMKLDNNCacheSize) + .def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16) + .def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16) + .def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode) + .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape) + .def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize) + .def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize) + .def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt) + .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16) + .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16) + .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile) + .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory) + .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory) + .def("enable_paddle_trt_collect_shape", + &RuntimeOption::EnablePaddleTrtCollectShape) + .def("disable_paddle_trt_collect_shape", + &RuntimeOption::DisablePaddleTrtCollectShape) + .def("use_ipu", &RuntimeOption::UseIpu) + .def("set_ipu_config", &RuntimeOption::SetIpuConfig) + .def("delete_paddle_backend_pass", + &RuntimeOption::DeletePaddleBackendPass) + .def("enable_profiling", &RuntimeOption::EnableProfiling) + .def("disable_profiling", &RuntimeOption::DisableProfiling) + .def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs) + .def_readwrite("model_file", &RuntimeOption::model_file) + .def_readwrite("params_file", &RuntimeOption::params_file) + .def_readwrite("model_format", &RuntimeOption::model_format) + .def_readwrite("backend", &RuntimeOption::backend) + .def_readwrite("external_stream", &RuntimeOption::external_stream_) + .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_) + .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) + .def_readwrite("device_id", &RuntimeOption::device_id) + .def_readwrite("device", &RuntimeOption::device) + .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape) + .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape) + .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape) + .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file) + .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16) + .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) + .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) + .def_readwrite("trt_max_workspace_size", + &RuntimeOption::trt_max_workspace_size) + .def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num) + .def_readwrite("ipu_micro_batch_size", + &RuntimeOption::ipu_micro_batch_size) + .def_readwrite("ipu_enable_pipelining", + &RuntimeOption::ipu_enable_pipelining) + .def_readwrite("ipu_batches_per_step", + &RuntimeOption::ipu_batches_per_step) + .def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16) + .def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num) + .def_readwrite("ipu_available_memory_proportion", + &RuntimeOption::ipu_available_memory_proportion) + .def_readwrite("ipu_enable_half_partial", + &RuntimeOption::ipu_enable_half_partial); +} +} // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index 1ed82891a..e7db79127 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -495,25 +495,24 @@ Runtime* Runtime::Clone(void* stream, int device_id) { bool Runtime::Compile(std::vector>& prewarm_tensors, const RuntimeOption& _option) { #ifdef ENABLE_POROS_BACKEND - option = _option; - auto poros_option = PorosBackendOption(); - poros_option.use_gpu = (option.device == Device::GPU) ? true : false; - poros_option.gpu_id = option.device_id; - poros_option.long_to_int = option.long_to_int; - poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; - poros_option.unconst_ops_thres = option.unconst_ops_thres; - poros_option.poros_file = option.poros_file; - poros_option.is_dynamic = option.is_dynamic; - poros_option.enable_fp16 = option.trt_enable_fp16; - poros_option.max_batch_size = option.trt_max_batch_size; - poros_option.max_workspace_size = option.trt_max_workspace_size; FDASSERT( option.model_format == ModelFormat::TORCHSCRIPT, "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); + if (option.device != Device::CPU && option.device != Device::GPU) { + FDERROR << "PorosBackend only supports CPU/GPU, but now its " + << option.device << "." << std::endl; + return false; + } + option.poros_option.device = option.device; + option.poros_option.device_id = option.device_id; + option.poros_option.enable_fp16 = option.trt_enable_fp16; + option.poros_option.max_batch_size = option.trt_max_batch_size; + option.poros_option.max_workspace_size = option.trt_max_workspace_size; backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT( - casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), + casted_backend->Compile(option.model_file, prewarm_tensors, + option.poros_option), "Load model from Torchscript failed while initliazing PorosBackend."); #else FDASSERT(false, diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index 64222f359..44b81e50a 100644 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -413,12 +413,7 @@ struct FASTDEPLOY_DECL RuntimeOption { // ======Only for PaddleTrt Backend======= std::vector trt_disabled_ops_{}; - // ======Only for Poros Backend======= - bool is_dynamic = false; - bool long_to_int = true; - bool use_nvidia_tf32 = false; - int unconst_ops_thres = -1; - std::string poros_file = ""; + PorosBackendOption poros_option; OpenVINOBackendOption openvino_option; diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py old mode 100755 new mode 100644 index b3da670bb..6c8c53cb8 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -147,7 +147,7 @@ class Runtime: def get_profile_time(self): """Get profile time of Runtime after the profile process is done. """ - return self._runtime.get_profile_time() + return self._runtime.get_profile_time() class RuntimeOption: @@ -160,65 +160,6 @@ class RuntimeOption: self._option = C.RuntimeOption() - @property - def is_dynamic(self): - """Only for Poros backend - - :param value: (bool)Whether to enable dynamic shape, default False - """ - return self._option.is_dynamic - - @property - def unconst_ops_thres(self): - """Only for Poros backend - - :param value: (int)Minimum number of subgraph OPs, default 10 - """ - return self._option.unconst_ops_thres - - @property - def long_to_int(self): - """Only for Poros backend - - :param value: (bool)Whether to convert long dtype to int dtype, default True - """ - return self._option.long_to_int - - @property - def use_nvidia_tf32(self): - """Only for Poros backend - - :param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False - """ - return self._option.use_nvidia_tf32 - - @is_dynamic.setter - def is_dynamic(self, value): - assert isinstance( - value, bool), "The value to set `is_dynamic` must be type of bool." - self._option.is_dynamic = value - - @unconst_ops_thres.setter - def unconst_ops_thres(self, value): - assert isinstance( - value, - int), "The value to set `unconst_ops_thres` must be type of int." - self._option.unconst_ops_thres = value - - @long_to_int.setter - def long_to_int(self, value): - assert isinstance( - value, - bool), "The value to set `long_to_int` must be type of bool." - self._option.long_to_int = value - - @use_nvidia_tf32.setter - def use_nvidia_tf32(self, value): - assert isinstance( - value, - bool), "The value to set `use_nvidia_tf32` must be type of bool." - self._option.use_nvidia_tf32 = value - def set_model_path(self, model_path, params_path="", @@ -322,6 +263,9 @@ class RuntimeOption: :param level: (int)Optimization level, -1 means the default setting """ + logging.warning( + "`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead." + ) return self._option.set_ort_graph_opt_level(level) def use_paddle_backend(self): @@ -364,29 +308,36 @@ class RuntimeOption: """ return self.use_lite_backend() - def set_lite_device_names(self, device_names): - """Set nnadapter device name for Paddle Lite backend. - """ - return self._option.set_lite_device_names(device_names) - def set_lite_context_properties(self, context_properties): """Set nnadapter context properties for Paddle Lite backend. """ + logging.warning( + "`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead." + ) return self._option.set_lite_context_properties(context_properties) def set_lite_model_cache_dir(self, model_cache_dir): """Set nnadapter model cache dir for Paddle Lite backend. """ + logging.warning( + "`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead." + ) return self._option.set_lite_model_cache_dir(model_cache_dir) def set_lite_dynamic_shape_info(self, dynamic_shape_info): """ Set nnadapter dynamic shape info for Paddle Lite backend. """ + logging.warning( + "`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead." + ) return self._option.set_lite_dynamic_shape_info(dynamic_shape_info) def set_lite_subgraph_partition_path(self, subgraph_partition_path): """ Set nnadapter subgraph partition path for Paddle Lite backend. """ + logging.warning( + "`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead." + ) return self._option.set_lite_subgraph_partition_path( subgraph_partition_path) @@ -394,6 +345,9 @@ class RuntimeOption: subgraph_partition_buffer): """ Set nnadapter subgraph partition buffer for Paddle Lite backend. """ + logging.warning( + "`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead." + ) return self._option.set_lite_subgraph_partition_config_buffer( subgraph_partition_buffer) @@ -401,6 +355,9 @@ class RuntimeOption: self, mixed_precision_quantization_config_path): """ Set nnadapter mixed precision quantization config path for Paddle Lite backend.. """ + logging.warning( + "`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead." + ) return self._option.set_lite_mixed_precision_quantization_config_path( mixed_precision_quantization_config_path) @@ -411,21 +368,33 @@ class RuntimeOption: def set_openvino_device(self, name="CPU"): """Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... + This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead. """ + logging.warning( + "`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead." + ) return self._option.set_openvino_device(name) def set_openvino_shape_info(self, shape_info): """Set shape information of the models' inputs, used for GPU to fix the shape + This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead. :param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]} """ + logging.warning( + "`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead." + ) return self._option.set_openvino_shape_info(shape_info) def set_openvino_cpu_operators(self, operators): """While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU + This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead. :param operators: (list of string)list of operators' name, e.g ["MulticlasNms"] """ + logging.warning( + "`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead." + ) return self._option.set_openvino_cpu_operators(operators) def enable_paddle_log_info(self): @@ -557,15 +526,45 @@ class RuntimeOption: available_memory_proportion, enable_half_partial) - def enable_profiling(self, - inclue_h2d_d2h=False, - repeat=100, warmup=50): + @property + def poros_option(self): + """Get PorosBackendOption object to configure Poros backend + + :return PorosBackendOption + """ + return self._option.poros_option + + @property + def paddle_lite_option(self): + """Get LiteBackendOption object to configure Paddle Lite backend + + :return LiteBackendOption + """ + return self._option.paddle_lite_option + + @property + def openvino_option(self): + """Get OpenVINOOption object to configure OpenVINO backend + + :return OpenVINOOption + """ + return self._option.openvino_option + + @property + def ort_option(self): + """Get OrtBackendOption object to configure ONNX Runtime backend + + :return OrtBackendOption + """ + return self._option.ort_option + + def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50): """Set the profile mode as 'true'. :param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. :param repeat Repeat times for runtime inference. :param warmup Warmup times for runtime inference. - """ - return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup) + """ + return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup) def disable_profiling(self): """Set the profile mode as 'false'. @@ -580,8 +579,7 @@ class RuntimeOption: continue if hasattr(getattr(self._option, attr), "__call__"): continue - message += " {} : {}\t\n".format(attr, - getattr(self._option, attr)) + message += " {} : {}\t\n".format(attr, getattr(self._option, attr)) message.strip("\n") message += ")" return message