[Other] Optimize Poros backend (#1232)

* Optimize Poros backend

* fix error

* Add more pybind

* fix conflicts

* add some deprecate notices
This commit is contained in:
Jason
2023-02-07 10:18:03 +08:00
committed by GitHub
parent f73a538f61
commit 7c9bf11c44
15 changed files with 397 additions and 283 deletions

View File

@@ -13,6 +13,10 @@
# limitations under the License. # limitations under the License.
include(ExternalProject) include(ExternalProject)
if(NOT ENABLE_TRT_BACKEND)
message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.")
endif()
set(POROS_PROJECT "extern_poros") set(POROS_PROJECT "extern_poros")
set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros) set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
set(POROS_SOURCE_DIR set(POROS_SOURCE_DIR
@@ -48,9 +52,10 @@ else()
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.") message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
else() else()
message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
if(WITH_GPU) if(WITH_GPU)
set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz") set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz")
else()
message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
endif() endif()
endif() endif()
endif() endif()
@@ -77,7 +82,7 @@ add_dependencies(external_poros ${POROS_PROJECT})
# Download libtorch.so with ABI=1 # Download libtorch.so with ABI=1
set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip") set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip")
set(TROCH_URL "${TORCH_URL_BASE}${TORCH_FILE}") set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
message(STATUS "Use the default Torch lib from: ${TORCH_URL}") message(STATUS "Use the default Torch lib from: ${TORCH_URL}")
download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install) download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install)
if(EXISTS ${THIRD_PARTY_PATH}/install/torch) if(EXISTS ${THIRD_PARTY_PATH}/install/torch)

View File

@@ -51,7 +51,6 @@ if __name__ == '__main__':
option.use_poros_backend() option.use_poros_backend()
option.set_model_path( option.set_model_path(
"std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT) "std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT)
option.is_dynamic = True
# compile # compile
runtime = fd.Runtime(option) runtime = fd.Runtime(option)
runtime.compile(prewarm_datas) runtime.compile(prewarm_datas)

View File

@@ -16,106 +16,10 @@
namespace fastdeploy { namespace fastdeploy {
void BindOption(pybind11::module& m);
void BindRuntime(pybind11::module& m) { void BindRuntime(pybind11::module& m) {
pybind11::class_<RuntimeOption>(m, "RuntimeOption") BindOption(m);
.def(pybind11::init())
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
.def("use_sophgo", &RuntimeOption::UseSophgo)
.def("use_ascend", &RuntimeOption::UseAscend)
.def("use_kunlunxin", &RuntimeOption::UseKunlunXin)
.def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
.def("set_lite_context_properties",
&RuntimeOption::SetLiteContextProperties)
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
.def("set_lite_dynamic_shape_info",
&RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path",
&RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path",
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer",
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
.def("set_openvino_cpu_operators",
&RuntimeOption::SetOpenVINOCpuOperators)
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
.def("set_paddle_mkldnn_cache_size",
&RuntimeOption::SetPaddleMKLDNNCacheSize)
.def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16)
.def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16)
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
.def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize)
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
.def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
.def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
.def("enable_paddle_trt_collect_shape",
&RuntimeOption::EnablePaddleTrtCollectShape)
.def("disable_paddle_trt_collect_shape",
&RuntimeOption::DisablePaddleTrtCollectShape)
.def("use_ipu", &RuntimeOption::UseIpu)
.def("set_ipu_config", &RuntimeOption::SetIpuConfig)
.def("delete_paddle_backend_pass",
&RuntimeOption::DeletePaddleBackendPass)
.def("enable_profiling", &RuntimeOption::EnableProfiling)
.def("disable_profiling", &RuntimeOption::DisableProfiling)
.def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs)
.def_readwrite("model_file", &RuntimeOption::model_file)
.def_readwrite("params_file", &RuntimeOption::params_file)
.def_readwrite("model_format", &RuntimeOption::model_format)
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size)
.def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
.def_readwrite("long_to_int", &RuntimeOption::long_to_int)
.def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
.def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
.def_readwrite("poros_file", &RuntimeOption::poros_file)
.def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num)
.def_readwrite("ipu_micro_batch_size",
&RuntimeOption::ipu_micro_batch_size)
.def_readwrite("ipu_enable_pipelining",
&RuntimeOption::ipu_enable_pipelining)
.def_readwrite("ipu_batches_per_step",
&RuntimeOption::ipu_batches_per_step)
.def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16)
.def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num)
.def_readwrite("ipu_available_memory_proportion",
&RuntimeOption::ipu_available_memory_proportion)
.def_readwrite("ipu_enable_half_partial",
&RuntimeOption::ipu_enable_half_partial);
pybind11::class_<TensorInfo>(m, "TensorInfo") pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name) .def_readwrite("name", &TensorInfo::name)

View File

@@ -21,9 +21,7 @@
// FastDepoy static library, default OFF. These messages // FastDepoy static library, default OFF. These messages
// are only reserve for debugging. // are only reserve for debugging.
#if defined(WITH_STATIC_WARNING) #if defined(WITH_STATIC_WARNING)
#warning You are using the FastDeploy static library. \ #warning You are using the FastDeploy static library. We will automatically add some registration codes for ops, kernels and passes for Paddle Lite. // NOLINT
We will automatically add some registration codes for \
ops, kernels and passes for Paddle Lite.
#endif #endif
#if !defined(WITH_STATIC_LIB_AT_COMPILING) #if !defined(WITH_STATIC_LIB_AT_COMPILING)
#include "paddle_use_ops.h" // NOLINT #include "paddle_use_ops.h" // NOLINT
@@ -52,7 +50,7 @@ enum LitePowerMode {
struct LiteBackendOption { struct LiteBackendOption {
/// Paddle Lite power mode for mobile device. /// Paddle Lite power mode for mobile device.
LitePowerMode power_mode = LITE_POWER_NO_BIND; int power_mode = 3;
/// Number of threads while use CPU /// Number of threads while use CPU
int cpu_threads = 1; int cpu_threads = 1;
/// Enable use half precision /// Enable use half precision

View File

@@ -0,0 +1,63 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
#include "fastdeploy/runtime/backends/lite/option.h"
namespace fastdeploy {
void BindLiteOption(pybind11::module& m) {
pybind11::class_<LiteBackendOption>(m, "LiteBackendOption")
.def(pybind11::init())
.def_readwrite("power_mode", &LiteBackendOption::power_mode)
.def_readwrite("cpu_threads", &LiteBackendOption::cpu_threads)
.def_readwrite("enable_fp16", &LiteBackendOption::enable_fp16)
.def_readwrite("enable_int8", &LiteBackendOption::enable_int8)
.def_readwrite("device", &LiteBackendOption::device)
.def_readwrite("optimized_model_dir",
&LiteBackendOption::optimized_model_dir)
.def_readwrite(
"nnadapter_subgraph_partition_config_path",
&LiteBackendOption::nnadapter_subgraph_partition_config_path)
.def_readwrite(
"nnadapter_subgraph_partition_config_buffer",
&LiteBackendOption::nnadapter_subgraph_partition_config_buffer)
.def_readwrite("nnadapter_context_properties",
&LiteBackendOption::nnadapter_context_properties)
.def_readwrite("nnadapter_model_cache_dir",
&LiteBackendOption::nnadapter_model_cache_dir)
.def_readwrite("nnadapter_mixed_precision_quantization_config_path",
&LiteBackendOption::
nnadapter_mixed_precision_quantization_config_path)
.def_readwrite("nnadapter_dynamic_shape_info",
&LiteBackendOption::nnadapter_dynamic_shape_info)
.def_readwrite("nnadapter_device_names",
&LiteBackendOption::nnadapter_device_names)
.def_readwrite("device_id", &LiteBackendOption::device_id)
.def_readwrite("kunlunxin_l3_workspace_size",
&LiteBackendOption::kunlunxin_l3_workspace_size)
.def_readwrite("kunlunxin_locked", &LiteBackendOption::kunlunxin_locked)
.def_readwrite("kunlunxin_autotune",
&LiteBackendOption::kunlunxin_autotune)
.def_readwrite("kunlunxin_autotune_file",
&LiteBackendOption::kunlunxin_autotune_file)
.def_readwrite("kunlunxin_precision",
&LiteBackendOption::kunlunxin_precision)
.def_readwrite("kunlunxin_adaptive_seqlen",
&LiteBackendOption::kunlunxin_adaptive_seqlen)
.def_readwrite("kunlunxin_enable_multi_stream",
&LiteBackendOption::kunlunxin_enable_multi_stream);
}
} // namespace fastdeploy

View File

@@ -0,0 +1,30 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
#include "fastdeploy/runtime/backends/openvino/option.h"
namespace fastdeploy {
void BindOpenVINOOption(pybind11::module& m) {
pybind11::class_<OpenVINOBackendOption>(m, "OpenVINOBackendOption")
.def(pybind11::init())
.def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num)
.def_readwrite("num_streams", &OpenVINOBackendOption::num_streams)
.def("set_device", &OpenVINOBackendOption::SetDevice)
.def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo)
.def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators);
}
} // namespace fastdeploy

View File

@@ -0,0 +1,34 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
#include "fastdeploy/runtime/backends/ort/option.h"
namespace fastdeploy {
void BindOrtOption(pybind11::module& m) {
pybind11::class_<OrtBackendOption>(m, "OrtBackendOption")
.def(pybind11::init())
.def_readwrite("graph_optimization_level",
&OrtBackendOption::graph_optimization_level)
.def_readwrite("intra_op_num_threads",
&OrtBackendOption::intra_op_num_threads)
.def_readwrite("inter_op_num_threads",
&OrtBackendOption::inter_op_num_threads)
.def_readwrite("execution_mode", &OrtBackendOption::execution_mode)
.def_readwrite("device", &OrtBackendOption::device)
.def_readwrite("device_id", &OrtBackendOption::device_id);
}
} // namespace fastdeploy

View File

@@ -23,12 +23,8 @@
namespace fastdeploy { namespace fastdeploy {
struct PorosBackendOption { struct PorosBackendOption {
#ifdef WITH_GPU Device device = Device::CPU;
bool use_gpu = true; int device_id = 0;
#else
bool use_gpu = false;
#endif
int gpu_id = 0;
bool long_to_int = true; bool long_to_int = true;
// There is calculation precision in tf32 mode on A10, it can bring some // There is calculation precision in tf32 mode on A10, it can bring some
// performance improvement, but there may be diff // performance improvement, but there may be diff

View File

@@ -0,0 +1,37 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
#include "fastdeploy/runtime/backends/poros/option.h"
namespace fastdeploy {
void BindPorosOption(pybind11::module& m) {
pybind11::class_<PorosBackendOption>(m, "PorosBackendOption")
.def(pybind11::init())
.def_readwrite("long_to_int", &PorosBackendOption::long_to_int)
.def_readwrite("use_nvidia_tf32", &PorosBackendOption::use_nvidia_tf32)
.def_readwrite("unconst_ops_thres",
&PorosBackendOption::unconst_ops_thres)
.def_readwrite("prewarm_datatypes",
&PorosBackendOption::prewarm_datatypes)
.def_readwrite("enable_fp16", &PorosBackendOption::enable_fp16)
.def_readwrite("enable_int8", &PorosBackendOption::enable_int8)
.def_readwrite("is_dynamic", &PorosBackendOption::is_dynamic)
.def_readwrite("max_batch_size", &PorosBackendOption::max_batch_size)
.def_readwrite("max_workspace_size",
&PorosBackendOption::max_workspace_size);
}
} // namespace fastdeploy

View File

@@ -43,11 +43,12 @@ std::vector<TensorInfo> PorosBackend::GetOutputInfos() {
} }
void PorosBackend::BuildOption(const PorosBackendOption& option) { void PorosBackend::BuildOption(const PorosBackendOption& option) {
_options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU _options.device = (option.device == Device::GPU)
: baidu::mirana::poros::Device::CPU; ? baidu::mirana::poros::Device::GPU
: baidu::mirana::poros::Device::CPU;
_options.long_to_int = option.long_to_int; _options.long_to_int = option.long_to_int;
_options.use_nvidia_tf32 = option.use_nvidia_tf32; _options.use_nvidia_tf32 = option.use_nvidia_tf32;
_options.device_id = option.gpu_id; _options.device_id = option.device_id;
_options.unconst_ops_thres = option.unconst_ops_thres; _options.unconst_ops_thres = option.unconst_ops_thres;
_options.is_dynamic = option.is_dynamic; _options.is_dynamic = option.is_dynamic;
_options.max_workspace_size = option.max_workspace_size; _options.max_workspace_size = option.max_workspace_size;
@@ -67,7 +68,7 @@ bool PorosBackend::Compile(const std::string& model_file,
torch::jit::Module mod; torch::jit::Module mod;
mod = torch::jit::load(model_file); mod = torch::jit::load(model_file);
mod.eval(); mod.eval();
if (option.use_gpu) { if (option.device == Device::GPU) {
mod.to(at::kCUDA); mod.to(at::kCUDA);
} else { } else {
mod.to(at::kCPU); mod.to(at::kCPU);
@@ -79,7 +80,7 @@ bool PorosBackend::Compile(const std::string& model_file,
_numinputs = inputs.size() - 1; _numinputs = inputs.size() - 1;
// FDTensor to at::Tensor // FDTensor to at::Tensor
std::vector<std::vector<c10::IValue>> prewarm_datas; std::vector<std::vector<c10::IValue>> prewarm_datas;
bool is_backend_cuda = option.use_gpu ? true : false; bool is_backend_cuda = (option.device == Device::GPU);
for (size_t i = 0; i < prewarm_tensors.size(); ++i) { for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
std::vector<c10::IValue> prewarm_data; std::vector<c10::IValue> prewarm_data;
for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) { for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
@@ -121,73 +122,6 @@ bool PorosBackend::Compile(const std::string& model_file,
return true; return true;
} }
bool PorosBackend::InitFromTorchScript(const std::string& model_file,
const PorosBackendOption& option) {
if (initialized_) {
FDERROR << "PorosBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
if (option.poros_file != "") {
std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in);
if (fin) {
FDINFO << "Detect compiled Poros file in " << option.poros_file
<< ", will load it directly." << std::endl;
fin.close();
return InitFromPoros(option.poros_file, option);
}
}
BuildOption(option);
torch::jit::Module mod;
mod = torch::jit::load(model_file);
mod.eval();
if (option.use_gpu) {
mod.to(at::kCUDA);
} else {
mod.to(at::kCPU);
}
// get inputs_nums and outputs_nums
auto graph = mod.get_method("forward").graph();
auto inputs = graph->inputs();
// remove self node
_numinputs = inputs.size() - 1;
auto outputs = graph->outputs();
_numoutputs = outputs.size();
_poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options);
if (_poros_module == nullptr) {
FDERROR << "PorosBackend initlize Failed, try initialize again."
<< std::endl;
return false;
}
initialized_ = true;
return true;
}
bool PorosBackend::InitFromPoros(const std::string& model_file,
const PorosBackendOption& option) {
if (initialized_) {
FDERROR << "PorosBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
BuildOption(option);
_poros_module = baidu::mirana::poros::Load(model_file, _options);
if (_poros_module == nullptr) {
FDERROR << "PorosBackend initlize Failed, try initialize again."
<< std::endl;
return false;
}
// get inputs_nums and outputs_nums
auto graph = _poros_module->get_method("forward").graph();
auto inputs = graph->inputs();
// remove self node
_numinputs = inputs.size() - 1;
auto outputs = graph->outputs();
_numoutputs = outputs.size();
initialized_ = true;
return true;
}
bool PorosBackend::Infer(std::vector<FDTensor>& inputs, bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, bool copy_to_fd) { std::vector<FDTensor>* outputs, bool copy_to_fd) {
// Convert FD Tensor to PyTorch Tensor // Convert FD Tensor to PyTorch Tensor
@@ -238,4 +172,4 @@ bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
return true; return true;
} }
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -51,13 +51,6 @@ class PorosBackend : public BaseBackend {
void BuildOption(const PorosBackendOption& option); void BuildOption(const PorosBackendOption& option);
bool
InitFromTorchScript(const std::string& model_file,
const PorosBackendOption& option = PorosBackendOption());
bool InitFromPoros(const std::string& model_file,
const PorosBackendOption& option = PorosBackendOption());
bool Compile(const std::string& model_file, bool Compile(const std::string& model_file,
std::vector<std::vector<FDTensor>>& prewarm_tensors, std::vector<std::vector<FDTensor>>& prewarm_tensors,
const PorosBackendOption& option = PorosBackendOption()); const PorosBackendOption& option = PorosBackendOption());

View File

@@ -0,0 +1,129 @@
// Cropyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindLiteOption(pybind11::module& m);
void BindOpenVINOOption(pybind11::module& m);
void BindOrtOption(pybind11::module& m);
void BindPorosOption(pybind11::module& m);
void BindOption(pybind11::module& m) {
BindLiteOption(m);
BindOpenVINOOption(m);
BindOrtOption(m);
BindPorosOption(m);
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
.def(pybind11::init())
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
.def("use_sophgo", &RuntimeOption::UseSophgo)
.def("use_ascend", &RuntimeOption::UseAscend)
.def("use_kunlunxin", &RuntimeOption::UseKunlunXin)
.def_readwrite("paddle_lite_option", &RuntimeOption::paddle_lite_option)
.def_readwrite("openvino_option", &RuntimeOption::openvino_option)
.def_readwrite("ort_option", &RuntimeOption::ort_option)
.def_readwrite("poros_option", &RuntimeOption::poros_option)
.def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
.def("set_lite_context_properties",
&RuntimeOption::SetLiteContextProperties)
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
.def("set_lite_dynamic_shape_info",
&RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path",
&RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path",
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer",
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
.def("set_openvino_cpu_operators",
&RuntimeOption::SetOpenVINOCpuOperators)
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
.def("set_paddle_mkldnn_cache_size",
&RuntimeOption::SetPaddleMKLDNNCacheSize)
.def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16)
.def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16)
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
.def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize)
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
.def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
.def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
.def("enable_paddle_trt_collect_shape",
&RuntimeOption::EnablePaddleTrtCollectShape)
.def("disable_paddle_trt_collect_shape",
&RuntimeOption::DisablePaddleTrtCollectShape)
.def("use_ipu", &RuntimeOption::UseIpu)
.def("set_ipu_config", &RuntimeOption::SetIpuConfig)
.def("delete_paddle_backend_pass",
&RuntimeOption::DeletePaddleBackendPass)
.def("enable_profiling", &RuntimeOption::EnableProfiling)
.def("disable_profiling", &RuntimeOption::DisableProfiling)
.def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs)
.def_readwrite("model_file", &RuntimeOption::model_file)
.def_readwrite("params_file", &RuntimeOption::params_file)
.def_readwrite("model_format", &RuntimeOption::model_format)
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size)
.def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num)
.def_readwrite("ipu_micro_batch_size",
&RuntimeOption::ipu_micro_batch_size)
.def_readwrite("ipu_enable_pipelining",
&RuntimeOption::ipu_enable_pipelining)
.def_readwrite("ipu_batches_per_step",
&RuntimeOption::ipu_batches_per_step)
.def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16)
.def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num)
.def_readwrite("ipu_available_memory_proportion",
&RuntimeOption::ipu_available_memory_proportion)
.def_readwrite("ipu_enable_half_partial",
&RuntimeOption::ipu_enable_half_partial);
}
} // namespace fastdeploy

View File

@@ -495,25 +495,24 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors, bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) { const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND #ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT( FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT, option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
if (option.device != Device::CPU && option.device != Device::GPU) {
FDERROR << "PorosBackend only supports CPU/GPU, but now its "
<< option.device << "." << std::endl;
return false;
}
option.poros_option.device = option.device;
option.poros_option.device_id = option.device_id;
option.poros_option.enable_fp16 = option.trt_enable_fp16;
option.poros_option.max_batch_size = option.trt_max_batch_size;
option.poros_option.max_workspace_size = option.trt_max_workspace_size;
backend_ = utils::make_unique<PorosBackend>(); backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get()); auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT( FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), casted_backend->Compile(option.model_file, prewarm_tensors,
option.poros_option),
"Load model from Torchscript failed while initliazing PorosBackend."); "Load model from Torchscript failed while initliazing PorosBackend.");
#else #else
FDASSERT(false, FDASSERT(false,

View File

@@ -413,12 +413,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
// ======Only for PaddleTrt Backend======= // ======Only for PaddleTrt Backend=======
std::vector<std::string> trt_disabled_ops_{}; std::vector<std::string> trt_disabled_ops_{};
// ======Only for Poros Backend======= PorosBackendOption poros_option;
bool is_dynamic = false;
bool long_to_int = true;
bool use_nvidia_tf32 = false;
int unconst_ops_thres = -1;
std::string poros_file = "";
OpenVINOBackendOption openvino_option; OpenVINOBackendOption openvino_option;

142
python/fastdeploy/runtime.py Executable file → Normal file
View File

@@ -147,7 +147,7 @@ class Runtime:
def get_profile_time(self): def get_profile_time(self):
"""Get profile time of Runtime after the profile process is done. """Get profile time of Runtime after the profile process is done.
""" """
return self._runtime.get_profile_time() return self._runtime.get_profile_time()
class RuntimeOption: class RuntimeOption:
@@ -160,65 +160,6 @@ class RuntimeOption:
self._option = C.RuntimeOption() self._option = C.RuntimeOption()
@property
def is_dynamic(self):
"""Only for Poros backend
:param value: (bool)Whether to enable dynamic shape, default False
"""
return self._option.is_dynamic
@property
def unconst_ops_thres(self):
"""Only for Poros backend
:param value: (int)Minimum number of subgraph OPs, default 10
"""
return self._option.unconst_ops_thres
@property
def long_to_int(self):
"""Only for Poros backend
:param value: (bool)Whether to convert long dtype to int dtype, default True
"""
return self._option.long_to_int
@property
def use_nvidia_tf32(self):
"""Only for Poros backend
:param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False
"""
return self._option.use_nvidia_tf32
@is_dynamic.setter
def is_dynamic(self, value):
assert isinstance(
value, bool), "The value to set `is_dynamic` must be type of bool."
self._option.is_dynamic = value
@unconst_ops_thres.setter
def unconst_ops_thres(self, value):
assert isinstance(
value,
int), "The value to set `unconst_ops_thres` must be type of int."
self._option.unconst_ops_thres = value
@long_to_int.setter
def long_to_int(self, value):
assert isinstance(
value,
bool), "The value to set `long_to_int` must be type of bool."
self._option.long_to_int = value
@use_nvidia_tf32.setter
def use_nvidia_tf32(self, value):
assert isinstance(
value,
bool), "The value to set `use_nvidia_tf32` must be type of bool."
self._option.use_nvidia_tf32 = value
def set_model_path(self, def set_model_path(self,
model_path, model_path,
params_path="", params_path="",
@@ -322,6 +263,9 @@ class RuntimeOption:
:param level: (int)Optimization level, -1 means the default setting :param level: (int)Optimization level, -1 means the default setting
""" """
logging.warning(
"`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead."
)
return self._option.set_ort_graph_opt_level(level) return self._option.set_ort_graph_opt_level(level)
def use_paddle_backend(self): def use_paddle_backend(self):
@@ -364,29 +308,36 @@ class RuntimeOption:
""" """
return self.use_lite_backend() return self.use_lite_backend()
def set_lite_device_names(self, device_names):
"""Set nnadapter device name for Paddle Lite backend.
"""
return self._option.set_lite_device_names(device_names)
def set_lite_context_properties(self, context_properties): def set_lite_context_properties(self, context_properties):
"""Set nnadapter context properties for Paddle Lite backend. """Set nnadapter context properties for Paddle Lite backend.
""" """
logging.warning(
"`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead."
)
return self._option.set_lite_context_properties(context_properties) return self._option.set_lite_context_properties(context_properties)
def set_lite_model_cache_dir(self, model_cache_dir): def set_lite_model_cache_dir(self, model_cache_dir):
"""Set nnadapter model cache dir for Paddle Lite backend. """Set nnadapter model cache dir for Paddle Lite backend.
""" """
logging.warning(
"`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead."
)
return self._option.set_lite_model_cache_dir(model_cache_dir) return self._option.set_lite_model_cache_dir(model_cache_dir)
def set_lite_dynamic_shape_info(self, dynamic_shape_info): def set_lite_dynamic_shape_info(self, dynamic_shape_info):
""" Set nnadapter dynamic shape info for Paddle Lite backend. """ Set nnadapter dynamic shape info for Paddle Lite backend.
""" """
logging.warning(
"`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead."
)
return self._option.set_lite_dynamic_shape_info(dynamic_shape_info) return self._option.set_lite_dynamic_shape_info(dynamic_shape_info)
def set_lite_subgraph_partition_path(self, subgraph_partition_path): def set_lite_subgraph_partition_path(self, subgraph_partition_path):
""" Set nnadapter subgraph partition path for Paddle Lite backend. """ Set nnadapter subgraph partition path for Paddle Lite backend.
""" """
logging.warning(
"`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead."
)
return self._option.set_lite_subgraph_partition_path( return self._option.set_lite_subgraph_partition_path(
subgraph_partition_path) subgraph_partition_path)
@@ -394,6 +345,9 @@ class RuntimeOption:
subgraph_partition_buffer): subgraph_partition_buffer):
""" Set nnadapter subgraph partition buffer for Paddle Lite backend. """ Set nnadapter subgraph partition buffer for Paddle Lite backend.
""" """
logging.warning(
"`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead."
)
return self._option.set_lite_subgraph_partition_config_buffer( return self._option.set_lite_subgraph_partition_config_buffer(
subgraph_partition_buffer) subgraph_partition_buffer)
@@ -401,6 +355,9 @@ class RuntimeOption:
self, mixed_precision_quantization_config_path): self, mixed_precision_quantization_config_path):
""" Set nnadapter mixed precision quantization config path for Paddle Lite backend.. """ Set nnadapter mixed precision quantization config path for Paddle Lite backend..
""" """
logging.warning(
"`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead."
)
return self._option.set_lite_mixed_precision_quantization_config_path( return self._option.set_lite_mixed_precision_quantization_config_path(
mixed_precision_quantization_config_path) mixed_precision_quantization_config_path)
@@ -411,21 +368,33 @@ class RuntimeOption:
def set_openvino_device(self, name="CPU"): def set_openvino_device(self, name="CPU"):
"""Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... """Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead.
""" """
logging.warning(
"`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead."
)
return self._option.set_openvino_device(name) return self._option.set_openvino_device(name)
def set_openvino_shape_info(self, shape_info): def set_openvino_shape_info(self, shape_info):
"""Set shape information of the models' inputs, used for GPU to fix the shape """Set shape information of the models' inputs, used for GPU to fix the shape
This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead.
:param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]} :param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
""" """
logging.warning(
"`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead."
)
return self._option.set_openvino_shape_info(shape_info) return self._option.set_openvino_shape_info(shape_info)
def set_openvino_cpu_operators(self, operators): def set_openvino_cpu_operators(self, operators):
"""While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU """While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead.
:param operators: (list of string)list of operators' name, e.g ["MulticlasNms"] :param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
""" """
logging.warning(
"`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead."
)
return self._option.set_openvino_cpu_operators(operators) return self._option.set_openvino_cpu_operators(operators)
def enable_paddle_log_info(self): def enable_paddle_log_info(self):
@@ -557,15 +526,45 @@ class RuntimeOption:
available_memory_proportion, available_memory_proportion,
enable_half_partial) enable_half_partial)
def enable_profiling(self, @property
inclue_h2d_d2h=False, def poros_option(self):
repeat=100, warmup=50): """Get PorosBackendOption object to configure Poros backend
:return PorosBackendOption
"""
return self._option.poros_option
@property
def paddle_lite_option(self):
"""Get LiteBackendOption object to configure Paddle Lite backend
:return LiteBackendOption
"""
return self._option.paddle_lite_option
@property
def openvino_option(self):
"""Get OpenVINOOption object to configure OpenVINO backend
:return OpenVINOOption
"""
return self._option.openvino_option
@property
def ort_option(self):
"""Get OrtBackendOption object to configure ONNX Runtime backend
:return OrtBackendOption
"""
return self._option.ort_option
def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50):
"""Set the profile mode as 'true'. """Set the profile mode as 'true'.
:param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. :param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
:param repeat Repeat times for runtime inference. :param repeat Repeat times for runtime inference.
:param warmup Warmup times for runtime inference. :param warmup Warmup times for runtime inference.
""" """
return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup) return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup)
def disable_profiling(self): def disable_profiling(self):
"""Set the profile mode as 'false'. """Set the profile mode as 'false'.
@@ -580,8 +579,7 @@ class RuntimeOption:
continue continue
if hasattr(getattr(self._option, attr), "__call__"): if hasattr(getattr(self._option, attr), "__call__"):
continue continue
message += " {} : {}\t\n".format(attr, message += " {} : {}\t\n".format(attr, getattr(self._option, attr))
getattr(self._option, attr))
message.strip("\n") message.strip("\n")
message += ")" message += ")"
return message return message