mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Other] Optimize Poros backend (#1232)
* Optimize Poros backend * fix error * Add more pybind * fix conflicts * add some deprecate notices
This commit is contained in:
@@ -13,6 +13,10 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
|
|
||||||
|
if(NOT ENABLE_TRT_BACKEND)
|
||||||
|
message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.")
|
||||||
|
endif()
|
||||||
|
|
||||||
set(POROS_PROJECT "extern_poros")
|
set(POROS_PROJECT "extern_poros")
|
||||||
set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
|
set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
|
||||||
set(POROS_SOURCE_DIR
|
set(POROS_SOURCE_DIR
|
||||||
@@ -48,9 +52,10 @@ else()
|
|||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||||
message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
|
message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
|
|
||||||
if(WITH_GPU)
|
if(WITH_GPU)
|
||||||
set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz")
|
set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz")
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@@ -77,7 +82,7 @@ add_dependencies(external_poros ${POROS_PROJECT})
|
|||||||
# Download libtorch.so with ABI=1
|
# Download libtorch.so with ABI=1
|
||||||
set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||||
set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip")
|
set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip")
|
||||||
set(TROCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
|
set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
|
||||||
message(STATUS "Use the default Torch lib from: ${TORCH_URL}")
|
message(STATUS "Use the default Torch lib from: ${TORCH_URL}")
|
||||||
download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install)
|
download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install)
|
||||||
if(EXISTS ${THIRD_PARTY_PATH}/install/torch)
|
if(EXISTS ${THIRD_PARTY_PATH}/install/torch)
|
||||||
|
@@ -51,7 +51,6 @@ if __name__ == '__main__':
|
|||||||
option.use_poros_backend()
|
option.use_poros_backend()
|
||||||
option.set_model_path(
|
option.set_model_path(
|
||||||
"std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT)
|
"std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT)
|
||||||
option.is_dynamic = True
|
|
||||||
# compile
|
# compile
|
||||||
runtime = fd.Runtime(option)
|
runtime = fd.Runtime(option)
|
||||||
runtime.compile(prewarm_datas)
|
runtime.compile(prewarm_datas)
|
||||||
|
@@ -16,106 +16,10 @@
|
|||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindOption(pybind11::module& m);
|
||||||
|
|
||||||
void BindRuntime(pybind11::module& m) {
|
void BindRuntime(pybind11::module& m) {
|
||||||
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
|
BindOption(m);
|
||||||
.def(pybind11::init())
|
|
||||||
.def("set_model_path", &RuntimeOption::SetModelPath)
|
|
||||||
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
|
|
||||||
.def("use_gpu", &RuntimeOption::UseGpu)
|
|
||||||
.def("use_cpu", &RuntimeOption::UseCpu)
|
|
||||||
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
|
|
||||||
.def("use_sophgo", &RuntimeOption::UseSophgo)
|
|
||||||
.def("use_ascend", &RuntimeOption::UseAscend)
|
|
||||||
.def("use_kunlunxin", &RuntimeOption::UseKunlunXin)
|
|
||||||
.def("set_external_stream", &RuntimeOption::SetExternalStream)
|
|
||||||
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
|
|
||||||
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
|
|
||||||
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
|
|
||||||
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
|
|
||||||
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
|
|
||||||
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
|
|
||||||
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
|
|
||||||
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
|
|
||||||
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
|
|
||||||
.def("set_lite_context_properties",
|
|
||||||
&RuntimeOption::SetLiteContextProperties)
|
|
||||||
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
|
|
||||||
.def("set_lite_dynamic_shape_info",
|
|
||||||
&RuntimeOption::SetLiteDynamicShapeInfo)
|
|
||||||
.def("set_lite_subgraph_partition_path",
|
|
||||||
&RuntimeOption::SetLiteSubgraphPartitionPath)
|
|
||||||
.def("set_lite_mixed_precision_quantization_config_path",
|
|
||||||
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
|
|
||||||
.def("set_lite_subgraph_partition_config_buffer",
|
|
||||||
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
|
|
||||||
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
|
|
||||||
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
|
|
||||||
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
|
|
||||||
.def("set_openvino_cpu_operators",
|
|
||||||
&RuntimeOption::SetOpenVINOCpuOperators)
|
|
||||||
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
|
|
||||||
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
|
|
||||||
.def("set_paddle_mkldnn_cache_size",
|
|
||||||
&RuntimeOption::SetPaddleMKLDNNCacheSize)
|
|
||||||
.def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16)
|
|
||||||
.def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16)
|
|
||||||
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
|
|
||||||
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
|
|
||||||
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
|
|
||||||
.def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize)
|
|
||||||
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
|
|
||||||
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
|
|
||||||
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
|
|
||||||
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
|
|
||||||
.def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
|
|
||||||
.def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
|
|
||||||
.def("enable_paddle_trt_collect_shape",
|
|
||||||
&RuntimeOption::EnablePaddleTrtCollectShape)
|
|
||||||
.def("disable_paddle_trt_collect_shape",
|
|
||||||
&RuntimeOption::DisablePaddleTrtCollectShape)
|
|
||||||
.def("use_ipu", &RuntimeOption::UseIpu)
|
|
||||||
.def("set_ipu_config", &RuntimeOption::SetIpuConfig)
|
|
||||||
.def("delete_paddle_backend_pass",
|
|
||||||
&RuntimeOption::DeletePaddleBackendPass)
|
|
||||||
.def("enable_profiling", &RuntimeOption::EnableProfiling)
|
|
||||||
.def("disable_profiling", &RuntimeOption::DisableProfiling)
|
|
||||||
.def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs)
|
|
||||||
.def_readwrite("model_file", &RuntimeOption::model_file)
|
|
||||||
.def_readwrite("params_file", &RuntimeOption::params_file)
|
|
||||||
.def_readwrite("model_format", &RuntimeOption::model_format)
|
|
||||||
.def_readwrite("backend", &RuntimeOption::backend)
|
|
||||||
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
|
|
||||||
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
|
|
||||||
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
|
||||||
.def_readwrite("device_id", &RuntimeOption::device_id)
|
|
||||||
.def_readwrite("device", &RuntimeOption::device)
|
|
||||||
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
|
||||||
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
|
||||||
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
|
||||||
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
|
|
||||||
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
|
|
||||||
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
|
|
||||||
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
|
|
||||||
.def_readwrite("trt_max_workspace_size",
|
|
||||||
&RuntimeOption::trt_max_workspace_size)
|
|
||||||
.def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
|
|
||||||
.def_readwrite("long_to_int", &RuntimeOption::long_to_int)
|
|
||||||
.def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
|
|
||||||
.def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
|
|
||||||
.def_readwrite("poros_file", &RuntimeOption::poros_file)
|
|
||||||
.def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num)
|
|
||||||
.def_readwrite("ipu_micro_batch_size",
|
|
||||||
&RuntimeOption::ipu_micro_batch_size)
|
|
||||||
.def_readwrite("ipu_enable_pipelining",
|
|
||||||
&RuntimeOption::ipu_enable_pipelining)
|
|
||||||
.def_readwrite("ipu_batches_per_step",
|
|
||||||
&RuntimeOption::ipu_batches_per_step)
|
|
||||||
.def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16)
|
|
||||||
.def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num)
|
|
||||||
.def_readwrite("ipu_available_memory_proportion",
|
|
||||||
&RuntimeOption::ipu_available_memory_proportion)
|
|
||||||
.def_readwrite("ipu_enable_half_partial",
|
|
||||||
&RuntimeOption::ipu_enable_half_partial);
|
|
||||||
|
|
||||||
pybind11::class_<TensorInfo>(m, "TensorInfo")
|
pybind11::class_<TensorInfo>(m, "TensorInfo")
|
||||||
.def_readwrite("name", &TensorInfo::name)
|
.def_readwrite("name", &TensorInfo::name)
|
||||||
|
@@ -21,9 +21,7 @@
|
|||||||
// FastDepoy static library, default OFF. These messages
|
// FastDepoy static library, default OFF. These messages
|
||||||
// are only reserve for debugging.
|
// are only reserve for debugging.
|
||||||
#if defined(WITH_STATIC_WARNING)
|
#if defined(WITH_STATIC_WARNING)
|
||||||
#warning You are using the FastDeploy static library. \
|
#warning You are using the FastDeploy static library. We will automatically add some registration codes for ops, kernels and passes for Paddle Lite. // NOLINT
|
||||||
We will automatically add some registration codes for \
|
|
||||||
ops, kernels and passes for Paddle Lite.
|
|
||||||
#endif
|
#endif
|
||||||
#if !defined(WITH_STATIC_LIB_AT_COMPILING)
|
#if !defined(WITH_STATIC_LIB_AT_COMPILING)
|
||||||
#include "paddle_use_ops.h" // NOLINT
|
#include "paddle_use_ops.h" // NOLINT
|
||||||
@@ -52,7 +50,7 @@ enum LitePowerMode {
|
|||||||
|
|
||||||
struct LiteBackendOption {
|
struct LiteBackendOption {
|
||||||
/// Paddle Lite power mode for mobile device.
|
/// Paddle Lite power mode for mobile device.
|
||||||
LitePowerMode power_mode = LITE_POWER_NO_BIND;
|
int power_mode = 3;
|
||||||
/// Number of threads while use CPU
|
/// Number of threads while use CPU
|
||||||
int cpu_threads = 1;
|
int cpu_threads = 1;
|
||||||
/// Enable use half precision
|
/// Enable use half precision
|
||||||
|
63
fastdeploy/runtime/backends/lite/option_pybind.cc
Normal file
63
fastdeploy/runtime/backends/lite/option_pybind.cc
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
#include "fastdeploy/runtime/backends/lite/option.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindLiteOption(pybind11::module& m) {
|
||||||
|
pybind11::class_<LiteBackendOption>(m, "LiteBackendOption")
|
||||||
|
.def(pybind11::init())
|
||||||
|
.def_readwrite("power_mode", &LiteBackendOption::power_mode)
|
||||||
|
.def_readwrite("cpu_threads", &LiteBackendOption::cpu_threads)
|
||||||
|
.def_readwrite("enable_fp16", &LiteBackendOption::enable_fp16)
|
||||||
|
.def_readwrite("enable_int8", &LiteBackendOption::enable_int8)
|
||||||
|
.def_readwrite("device", &LiteBackendOption::device)
|
||||||
|
.def_readwrite("optimized_model_dir",
|
||||||
|
&LiteBackendOption::optimized_model_dir)
|
||||||
|
.def_readwrite(
|
||||||
|
"nnadapter_subgraph_partition_config_path",
|
||||||
|
&LiteBackendOption::nnadapter_subgraph_partition_config_path)
|
||||||
|
.def_readwrite(
|
||||||
|
"nnadapter_subgraph_partition_config_buffer",
|
||||||
|
&LiteBackendOption::nnadapter_subgraph_partition_config_buffer)
|
||||||
|
.def_readwrite("nnadapter_context_properties",
|
||||||
|
&LiteBackendOption::nnadapter_context_properties)
|
||||||
|
.def_readwrite("nnadapter_model_cache_dir",
|
||||||
|
&LiteBackendOption::nnadapter_model_cache_dir)
|
||||||
|
.def_readwrite("nnadapter_mixed_precision_quantization_config_path",
|
||||||
|
&LiteBackendOption::
|
||||||
|
nnadapter_mixed_precision_quantization_config_path)
|
||||||
|
.def_readwrite("nnadapter_dynamic_shape_info",
|
||||||
|
&LiteBackendOption::nnadapter_dynamic_shape_info)
|
||||||
|
.def_readwrite("nnadapter_device_names",
|
||||||
|
&LiteBackendOption::nnadapter_device_names)
|
||||||
|
.def_readwrite("device_id", &LiteBackendOption::device_id)
|
||||||
|
.def_readwrite("kunlunxin_l3_workspace_size",
|
||||||
|
&LiteBackendOption::kunlunxin_l3_workspace_size)
|
||||||
|
.def_readwrite("kunlunxin_locked", &LiteBackendOption::kunlunxin_locked)
|
||||||
|
.def_readwrite("kunlunxin_autotune",
|
||||||
|
&LiteBackendOption::kunlunxin_autotune)
|
||||||
|
.def_readwrite("kunlunxin_autotune_file",
|
||||||
|
&LiteBackendOption::kunlunxin_autotune_file)
|
||||||
|
.def_readwrite("kunlunxin_precision",
|
||||||
|
&LiteBackendOption::kunlunxin_precision)
|
||||||
|
.def_readwrite("kunlunxin_adaptive_seqlen",
|
||||||
|
&LiteBackendOption::kunlunxin_adaptive_seqlen)
|
||||||
|
.def_readwrite("kunlunxin_enable_multi_stream",
|
||||||
|
&LiteBackendOption::kunlunxin_enable_multi_stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
30
fastdeploy/runtime/backends/openvino/option_pybind.cc
Normal file
30
fastdeploy/runtime/backends/openvino/option_pybind.cc
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
#include "fastdeploy/runtime/backends/openvino/option.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindOpenVINOOption(pybind11::module& m) {
|
||||||
|
pybind11::class_<OpenVINOBackendOption>(m, "OpenVINOBackendOption")
|
||||||
|
.def(pybind11::init())
|
||||||
|
.def_readwrite("cpu_thread_num", &OpenVINOBackendOption::cpu_thread_num)
|
||||||
|
.def_readwrite("num_streams", &OpenVINOBackendOption::num_streams)
|
||||||
|
.def("set_device", &OpenVINOBackendOption::SetDevice)
|
||||||
|
.def("set_shape_info", &OpenVINOBackendOption::SetShapeInfo)
|
||||||
|
.def("set_cpu_operators", &OpenVINOBackendOption::SetCpuOperators);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
34
fastdeploy/runtime/backends/ort/option_pybind.cc
Normal file
34
fastdeploy/runtime/backends/ort/option_pybind.cc
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
#include "fastdeploy/runtime/backends/ort/option.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindOrtOption(pybind11::module& m) {
|
||||||
|
pybind11::class_<OrtBackendOption>(m, "OrtBackendOption")
|
||||||
|
.def(pybind11::init())
|
||||||
|
.def_readwrite("graph_optimization_level",
|
||||||
|
&OrtBackendOption::graph_optimization_level)
|
||||||
|
.def_readwrite("intra_op_num_threads",
|
||||||
|
&OrtBackendOption::intra_op_num_threads)
|
||||||
|
.def_readwrite("inter_op_num_threads",
|
||||||
|
&OrtBackendOption::inter_op_num_threads)
|
||||||
|
.def_readwrite("execution_mode", &OrtBackendOption::execution_mode)
|
||||||
|
.def_readwrite("device", &OrtBackendOption::device)
|
||||||
|
.def_readwrite("device_id", &OrtBackendOption::device_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
@@ -23,12 +23,8 @@
|
|||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct PorosBackendOption {
|
struct PorosBackendOption {
|
||||||
#ifdef WITH_GPU
|
Device device = Device::CPU;
|
||||||
bool use_gpu = true;
|
int device_id = 0;
|
||||||
#else
|
|
||||||
bool use_gpu = false;
|
|
||||||
#endif
|
|
||||||
int gpu_id = 0;
|
|
||||||
bool long_to_int = true;
|
bool long_to_int = true;
|
||||||
// There is calculation precision in tf32 mode on A10, it can bring some
|
// There is calculation precision in tf32 mode on A10, it can bring some
|
||||||
// performance improvement, but there may be diff
|
// performance improvement, but there may be diff
|
||||||
|
37
fastdeploy/runtime/backends/poros/option_pybind.cc
Normal file
37
fastdeploy/runtime/backends/poros/option_pybind.cc
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
#include "fastdeploy/runtime/backends/poros/option.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindPorosOption(pybind11::module& m) {
|
||||||
|
pybind11::class_<PorosBackendOption>(m, "PorosBackendOption")
|
||||||
|
.def(pybind11::init())
|
||||||
|
.def_readwrite("long_to_int", &PorosBackendOption::long_to_int)
|
||||||
|
.def_readwrite("use_nvidia_tf32", &PorosBackendOption::use_nvidia_tf32)
|
||||||
|
.def_readwrite("unconst_ops_thres",
|
||||||
|
&PorosBackendOption::unconst_ops_thres)
|
||||||
|
.def_readwrite("prewarm_datatypes",
|
||||||
|
&PorosBackendOption::prewarm_datatypes)
|
||||||
|
.def_readwrite("enable_fp16", &PorosBackendOption::enable_fp16)
|
||||||
|
.def_readwrite("enable_int8", &PorosBackendOption::enable_int8)
|
||||||
|
.def_readwrite("is_dynamic", &PorosBackendOption::is_dynamic)
|
||||||
|
.def_readwrite("max_batch_size", &PorosBackendOption::max_batch_size)
|
||||||
|
.def_readwrite("max_workspace_size",
|
||||||
|
&PorosBackendOption::max_workspace_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
@@ -43,11 +43,12 @@ std::vector<TensorInfo> PorosBackend::GetOutputInfos() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void PorosBackend::BuildOption(const PorosBackendOption& option) {
|
void PorosBackend::BuildOption(const PorosBackendOption& option) {
|
||||||
_options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU
|
_options.device = (option.device == Device::GPU)
|
||||||
: baidu::mirana::poros::Device::CPU;
|
? baidu::mirana::poros::Device::GPU
|
||||||
|
: baidu::mirana::poros::Device::CPU;
|
||||||
_options.long_to_int = option.long_to_int;
|
_options.long_to_int = option.long_to_int;
|
||||||
_options.use_nvidia_tf32 = option.use_nvidia_tf32;
|
_options.use_nvidia_tf32 = option.use_nvidia_tf32;
|
||||||
_options.device_id = option.gpu_id;
|
_options.device_id = option.device_id;
|
||||||
_options.unconst_ops_thres = option.unconst_ops_thres;
|
_options.unconst_ops_thres = option.unconst_ops_thres;
|
||||||
_options.is_dynamic = option.is_dynamic;
|
_options.is_dynamic = option.is_dynamic;
|
||||||
_options.max_workspace_size = option.max_workspace_size;
|
_options.max_workspace_size = option.max_workspace_size;
|
||||||
@@ -67,7 +68,7 @@ bool PorosBackend::Compile(const std::string& model_file,
|
|||||||
torch::jit::Module mod;
|
torch::jit::Module mod;
|
||||||
mod = torch::jit::load(model_file);
|
mod = torch::jit::load(model_file);
|
||||||
mod.eval();
|
mod.eval();
|
||||||
if (option.use_gpu) {
|
if (option.device == Device::GPU) {
|
||||||
mod.to(at::kCUDA);
|
mod.to(at::kCUDA);
|
||||||
} else {
|
} else {
|
||||||
mod.to(at::kCPU);
|
mod.to(at::kCPU);
|
||||||
@@ -79,7 +80,7 @@ bool PorosBackend::Compile(const std::string& model_file,
|
|||||||
_numinputs = inputs.size() - 1;
|
_numinputs = inputs.size() - 1;
|
||||||
// FDTensor to at::Tensor
|
// FDTensor to at::Tensor
|
||||||
std::vector<std::vector<c10::IValue>> prewarm_datas;
|
std::vector<std::vector<c10::IValue>> prewarm_datas;
|
||||||
bool is_backend_cuda = option.use_gpu ? true : false;
|
bool is_backend_cuda = (option.device == Device::GPU);
|
||||||
for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
|
for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
|
||||||
std::vector<c10::IValue> prewarm_data;
|
std::vector<c10::IValue> prewarm_data;
|
||||||
for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
|
for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
|
||||||
@@ -121,73 +122,6 @@ bool PorosBackend::Compile(const std::string& model_file,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PorosBackend::InitFromTorchScript(const std::string& model_file,
|
|
||||||
const PorosBackendOption& option) {
|
|
||||||
if (initialized_) {
|
|
||||||
FDERROR << "PorosBackend is already initlized, cannot initialize again."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (option.poros_file != "") {
|
|
||||||
std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in);
|
|
||||||
if (fin) {
|
|
||||||
FDINFO << "Detect compiled Poros file in " << option.poros_file
|
|
||||||
<< ", will load it directly." << std::endl;
|
|
||||||
fin.close();
|
|
||||||
return InitFromPoros(option.poros_file, option);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BuildOption(option);
|
|
||||||
torch::jit::Module mod;
|
|
||||||
mod = torch::jit::load(model_file);
|
|
||||||
mod.eval();
|
|
||||||
if (option.use_gpu) {
|
|
||||||
mod.to(at::kCUDA);
|
|
||||||
} else {
|
|
||||||
mod.to(at::kCPU);
|
|
||||||
}
|
|
||||||
// get inputs_nums and outputs_nums
|
|
||||||
auto graph = mod.get_method("forward").graph();
|
|
||||||
auto inputs = graph->inputs();
|
|
||||||
// remove self node
|
|
||||||
_numinputs = inputs.size() - 1;
|
|
||||||
auto outputs = graph->outputs();
|
|
||||||
_numoutputs = outputs.size();
|
|
||||||
_poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options);
|
|
||||||
if (_poros_module == nullptr) {
|
|
||||||
FDERROR << "PorosBackend initlize Failed, try initialize again."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
initialized_ = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PorosBackend::InitFromPoros(const std::string& model_file,
|
|
||||||
const PorosBackendOption& option) {
|
|
||||||
if (initialized_) {
|
|
||||||
FDERROR << "PorosBackend is already initlized, cannot initialize again."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
BuildOption(option);
|
|
||||||
_poros_module = baidu::mirana::poros::Load(model_file, _options);
|
|
||||||
if (_poros_module == nullptr) {
|
|
||||||
FDERROR << "PorosBackend initlize Failed, try initialize again."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// get inputs_nums and outputs_nums
|
|
||||||
auto graph = _poros_module->get_method("forward").graph();
|
|
||||||
auto inputs = graph->inputs();
|
|
||||||
// remove self node
|
|
||||||
_numinputs = inputs.size() - 1;
|
|
||||||
auto outputs = graph->outputs();
|
|
||||||
_numoutputs = outputs.size();
|
|
||||||
initialized_ = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
|
bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs, bool copy_to_fd) {
|
std::vector<FDTensor>* outputs, bool copy_to_fd) {
|
||||||
// Convert FD Tensor to PyTorch Tensor
|
// Convert FD Tensor to PyTorch Tensor
|
||||||
@@ -238,4 +172,4 @@ bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -51,13 +51,6 @@ class PorosBackend : public BaseBackend {
|
|||||||
|
|
||||||
void BuildOption(const PorosBackendOption& option);
|
void BuildOption(const PorosBackendOption& option);
|
||||||
|
|
||||||
bool
|
|
||||||
InitFromTorchScript(const std::string& model_file,
|
|
||||||
const PorosBackendOption& option = PorosBackendOption());
|
|
||||||
|
|
||||||
bool InitFromPoros(const std::string& model_file,
|
|
||||||
const PorosBackendOption& option = PorosBackendOption());
|
|
||||||
|
|
||||||
bool Compile(const std::string& model_file,
|
bool Compile(const std::string& model_file,
|
||||||
std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
const PorosBackendOption& option = PorosBackendOption());
|
const PorosBackendOption& option = PorosBackendOption());
|
||||||
|
129
fastdeploy/runtime/option_pybind.cc
Normal file
129
fastdeploy/runtime/option_pybind.cc
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
// Cropyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
void BindLiteOption(pybind11::module& m);
|
||||||
|
void BindOpenVINOOption(pybind11::module& m);
|
||||||
|
void BindOrtOption(pybind11::module& m);
|
||||||
|
void BindPorosOption(pybind11::module& m);
|
||||||
|
|
||||||
|
void BindOption(pybind11::module& m) {
|
||||||
|
BindLiteOption(m);
|
||||||
|
BindOpenVINOOption(m);
|
||||||
|
BindOrtOption(m);
|
||||||
|
BindPorosOption(m);
|
||||||
|
|
||||||
|
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
|
||||||
|
.def(pybind11::init())
|
||||||
|
.def("set_model_path", &RuntimeOption::SetModelPath)
|
||||||
|
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
|
||||||
|
.def("use_gpu", &RuntimeOption::UseGpu)
|
||||||
|
.def("use_cpu", &RuntimeOption::UseCpu)
|
||||||
|
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
|
||||||
|
.def("use_sophgo", &RuntimeOption::UseSophgo)
|
||||||
|
.def("use_ascend", &RuntimeOption::UseAscend)
|
||||||
|
.def("use_kunlunxin", &RuntimeOption::UseKunlunXin)
|
||||||
|
.def_readwrite("paddle_lite_option", &RuntimeOption::paddle_lite_option)
|
||||||
|
.def_readwrite("openvino_option", &RuntimeOption::openvino_option)
|
||||||
|
.def_readwrite("ort_option", &RuntimeOption::ort_option)
|
||||||
|
.def_readwrite("poros_option", &RuntimeOption::poros_option)
|
||||||
|
.def("set_external_stream", &RuntimeOption::SetExternalStream)
|
||||||
|
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
|
||||||
|
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
|
||||||
|
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
|
||||||
|
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
|
||||||
|
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
|
||||||
|
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
|
||||||
|
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
|
||||||
|
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
|
||||||
|
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
|
||||||
|
.def("set_lite_context_properties",
|
||||||
|
&RuntimeOption::SetLiteContextProperties)
|
||||||
|
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
|
||||||
|
.def("set_lite_dynamic_shape_info",
|
||||||
|
&RuntimeOption::SetLiteDynamicShapeInfo)
|
||||||
|
.def("set_lite_subgraph_partition_path",
|
||||||
|
&RuntimeOption::SetLiteSubgraphPartitionPath)
|
||||||
|
.def("set_lite_mixed_precision_quantization_config_path",
|
||||||
|
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
|
||||||
|
.def("set_lite_subgraph_partition_config_buffer",
|
||||||
|
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
|
||||||
|
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
|
||||||
|
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
|
||||||
|
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
|
||||||
|
.def("set_openvino_cpu_operators",
|
||||||
|
&RuntimeOption::SetOpenVINOCpuOperators)
|
||||||
|
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
|
||||||
|
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
|
||||||
|
.def("set_paddle_mkldnn_cache_size",
|
||||||
|
&RuntimeOption::SetPaddleMKLDNNCacheSize)
|
||||||
|
.def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16)
|
||||||
|
.def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16)
|
||||||
|
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
|
||||||
|
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
|
||||||
|
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
|
||||||
|
.def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize)
|
||||||
|
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
|
||||||
|
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
|
||||||
|
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
|
||||||
|
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
|
||||||
|
.def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
|
||||||
|
.def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
|
||||||
|
.def("enable_paddle_trt_collect_shape",
|
||||||
|
&RuntimeOption::EnablePaddleTrtCollectShape)
|
||||||
|
.def("disable_paddle_trt_collect_shape",
|
||||||
|
&RuntimeOption::DisablePaddleTrtCollectShape)
|
||||||
|
.def("use_ipu", &RuntimeOption::UseIpu)
|
||||||
|
.def("set_ipu_config", &RuntimeOption::SetIpuConfig)
|
||||||
|
.def("delete_paddle_backend_pass",
|
||||||
|
&RuntimeOption::DeletePaddleBackendPass)
|
||||||
|
.def("enable_profiling", &RuntimeOption::EnableProfiling)
|
||||||
|
.def("disable_profiling", &RuntimeOption::DisableProfiling)
|
||||||
|
.def("disable_paddle_trt_ops", &RuntimeOption::DisablePaddleTrtOPs)
|
||||||
|
.def_readwrite("model_file", &RuntimeOption::model_file)
|
||||||
|
.def_readwrite("params_file", &RuntimeOption::params_file)
|
||||||
|
.def_readwrite("model_format", &RuntimeOption::model_format)
|
||||||
|
.def_readwrite("backend", &RuntimeOption::backend)
|
||||||
|
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
|
||||||
|
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
|
||||||
|
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
||||||
|
.def_readwrite("device_id", &RuntimeOption::device_id)
|
||||||
|
.def_readwrite("device", &RuntimeOption::device)
|
||||||
|
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
||||||
|
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
||||||
|
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
||||||
|
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
|
||||||
|
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
|
||||||
|
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
|
||||||
|
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
|
||||||
|
.def_readwrite("trt_max_workspace_size",
|
||||||
|
&RuntimeOption::trt_max_workspace_size)
|
||||||
|
.def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num)
|
||||||
|
.def_readwrite("ipu_micro_batch_size",
|
||||||
|
&RuntimeOption::ipu_micro_batch_size)
|
||||||
|
.def_readwrite("ipu_enable_pipelining",
|
||||||
|
&RuntimeOption::ipu_enable_pipelining)
|
||||||
|
.def_readwrite("ipu_batches_per_step",
|
||||||
|
&RuntimeOption::ipu_batches_per_step)
|
||||||
|
.def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16)
|
||||||
|
.def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num)
|
||||||
|
.def_readwrite("ipu_available_memory_proportion",
|
||||||
|
&RuntimeOption::ipu_available_memory_proportion)
|
||||||
|
.def_readwrite("ipu_enable_half_partial",
|
||||||
|
&RuntimeOption::ipu_enable_half_partial);
|
||||||
|
}
|
||||||
|
} // namespace fastdeploy
|
@@ -495,25 +495,24 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
|
|||||||
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
const RuntimeOption& _option) {
|
const RuntimeOption& _option) {
|
||||||
#ifdef ENABLE_POROS_BACKEND
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
option = _option;
|
|
||||||
auto poros_option = PorosBackendOption();
|
|
||||||
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
|
||||||
poros_option.gpu_id = option.device_id;
|
|
||||||
poros_option.long_to_int = option.long_to_int;
|
|
||||||
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
|
|
||||||
poros_option.unconst_ops_thres = option.unconst_ops_thres;
|
|
||||||
poros_option.poros_file = option.poros_file;
|
|
||||||
poros_option.is_dynamic = option.is_dynamic;
|
|
||||||
poros_option.enable_fp16 = option.trt_enable_fp16;
|
|
||||||
poros_option.max_batch_size = option.trt_max_batch_size;
|
|
||||||
poros_option.max_workspace_size = option.trt_max_workspace_size;
|
|
||||||
FDASSERT(
|
FDASSERT(
|
||||||
option.model_format == ModelFormat::TORCHSCRIPT,
|
option.model_format == ModelFormat::TORCHSCRIPT,
|
||||||
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
||||||
|
if (option.device != Device::CPU && option.device != Device::GPU) {
|
||||||
|
FDERROR << "PorosBackend only supports CPU/GPU, but now its "
|
||||||
|
<< option.device << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
option.poros_option.device = option.device;
|
||||||
|
option.poros_option.device_id = option.device_id;
|
||||||
|
option.poros_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
|
option.poros_option.max_batch_size = option.trt_max_batch_size;
|
||||||
|
option.poros_option.max_workspace_size = option.trt_max_workspace_size;
|
||||||
backend_ = utils::make_unique<PorosBackend>();
|
backend_ = utils::make_unique<PorosBackend>();
|
||||||
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
||||||
FDASSERT(
|
FDASSERT(
|
||||||
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
|
casted_backend->Compile(option.model_file, prewarm_tensors,
|
||||||
|
option.poros_option),
|
||||||
"Load model from Torchscript failed while initliazing PorosBackend.");
|
"Load model from Torchscript failed while initliazing PorosBackend.");
|
||||||
#else
|
#else
|
||||||
FDASSERT(false,
|
FDASSERT(false,
|
||||||
|
@@ -413,12 +413,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
// ======Only for PaddleTrt Backend=======
|
// ======Only for PaddleTrt Backend=======
|
||||||
std::vector<std::string> trt_disabled_ops_{};
|
std::vector<std::string> trt_disabled_ops_{};
|
||||||
|
|
||||||
// ======Only for Poros Backend=======
|
PorosBackendOption poros_option;
|
||||||
bool is_dynamic = false;
|
|
||||||
bool long_to_int = true;
|
|
||||||
bool use_nvidia_tf32 = false;
|
|
||||||
int unconst_ops_thres = -1;
|
|
||||||
std::string poros_file = "";
|
|
||||||
|
|
||||||
OpenVINOBackendOption openvino_option;
|
OpenVINOBackendOption openvino_option;
|
||||||
|
|
||||||
|
142
python/fastdeploy/runtime.py
Executable file → Normal file
142
python/fastdeploy/runtime.py
Executable file → Normal file
@@ -147,7 +147,7 @@ class Runtime:
|
|||||||
def get_profile_time(self):
|
def get_profile_time(self):
|
||||||
"""Get profile time of Runtime after the profile process is done.
|
"""Get profile time of Runtime after the profile process is done.
|
||||||
"""
|
"""
|
||||||
return self._runtime.get_profile_time()
|
return self._runtime.get_profile_time()
|
||||||
|
|
||||||
|
|
||||||
class RuntimeOption:
|
class RuntimeOption:
|
||||||
@@ -160,65 +160,6 @@ class RuntimeOption:
|
|||||||
|
|
||||||
self._option = C.RuntimeOption()
|
self._option = C.RuntimeOption()
|
||||||
|
|
||||||
@property
|
|
||||||
def is_dynamic(self):
|
|
||||||
"""Only for Poros backend
|
|
||||||
|
|
||||||
:param value: (bool)Whether to enable dynamic shape, default False
|
|
||||||
"""
|
|
||||||
return self._option.is_dynamic
|
|
||||||
|
|
||||||
@property
|
|
||||||
def unconst_ops_thres(self):
|
|
||||||
"""Only for Poros backend
|
|
||||||
|
|
||||||
:param value: (int)Minimum number of subgraph OPs, default 10
|
|
||||||
"""
|
|
||||||
return self._option.unconst_ops_thres
|
|
||||||
|
|
||||||
@property
|
|
||||||
def long_to_int(self):
|
|
||||||
"""Only for Poros backend
|
|
||||||
|
|
||||||
:param value: (bool)Whether to convert long dtype to int dtype, default True
|
|
||||||
"""
|
|
||||||
return self._option.long_to_int
|
|
||||||
|
|
||||||
@property
|
|
||||||
def use_nvidia_tf32(self):
|
|
||||||
"""Only for Poros backend
|
|
||||||
|
|
||||||
:param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False
|
|
||||||
"""
|
|
||||||
return self._option.use_nvidia_tf32
|
|
||||||
|
|
||||||
@is_dynamic.setter
|
|
||||||
def is_dynamic(self, value):
|
|
||||||
assert isinstance(
|
|
||||||
value, bool), "The value to set `is_dynamic` must be type of bool."
|
|
||||||
self._option.is_dynamic = value
|
|
||||||
|
|
||||||
@unconst_ops_thres.setter
|
|
||||||
def unconst_ops_thres(self, value):
|
|
||||||
assert isinstance(
|
|
||||||
value,
|
|
||||||
int), "The value to set `unconst_ops_thres` must be type of int."
|
|
||||||
self._option.unconst_ops_thres = value
|
|
||||||
|
|
||||||
@long_to_int.setter
|
|
||||||
def long_to_int(self, value):
|
|
||||||
assert isinstance(
|
|
||||||
value,
|
|
||||||
bool), "The value to set `long_to_int` must be type of bool."
|
|
||||||
self._option.long_to_int = value
|
|
||||||
|
|
||||||
@use_nvidia_tf32.setter
|
|
||||||
def use_nvidia_tf32(self, value):
|
|
||||||
assert isinstance(
|
|
||||||
value,
|
|
||||||
bool), "The value to set `use_nvidia_tf32` must be type of bool."
|
|
||||||
self._option.use_nvidia_tf32 = value
|
|
||||||
|
|
||||||
def set_model_path(self,
|
def set_model_path(self,
|
||||||
model_path,
|
model_path,
|
||||||
params_path="",
|
params_path="",
|
||||||
@@ -322,6 +263,9 @@ class RuntimeOption:
|
|||||||
|
|
||||||
:param level: (int)Optimization level, -1 means the default setting
|
:param level: (int)Optimization level, -1 means the default setting
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead."
|
||||||
|
)
|
||||||
return self._option.set_ort_graph_opt_level(level)
|
return self._option.set_ort_graph_opt_level(level)
|
||||||
|
|
||||||
def use_paddle_backend(self):
|
def use_paddle_backend(self):
|
||||||
@@ -364,29 +308,36 @@ class RuntimeOption:
|
|||||||
"""
|
"""
|
||||||
return self.use_lite_backend()
|
return self.use_lite_backend()
|
||||||
|
|
||||||
def set_lite_device_names(self, device_names):
|
|
||||||
"""Set nnadapter device name for Paddle Lite backend.
|
|
||||||
"""
|
|
||||||
return self._option.set_lite_device_names(device_names)
|
|
||||||
|
|
||||||
def set_lite_context_properties(self, context_properties):
|
def set_lite_context_properties(self, context_properties):
|
||||||
"""Set nnadapter context properties for Paddle Lite backend.
|
"""Set nnadapter context properties for Paddle Lite backend.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_context_properties(context_properties)
|
return self._option.set_lite_context_properties(context_properties)
|
||||||
|
|
||||||
def set_lite_model_cache_dir(self, model_cache_dir):
|
def set_lite_model_cache_dir(self, model_cache_dir):
|
||||||
"""Set nnadapter model cache dir for Paddle Lite backend.
|
"""Set nnadapter model cache dir for Paddle Lite backend.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_model_cache_dir(model_cache_dir)
|
return self._option.set_lite_model_cache_dir(model_cache_dir)
|
||||||
|
|
||||||
def set_lite_dynamic_shape_info(self, dynamic_shape_info):
|
def set_lite_dynamic_shape_info(self, dynamic_shape_info):
|
||||||
""" Set nnadapter dynamic shape info for Paddle Lite backend.
|
""" Set nnadapter dynamic shape info for Paddle Lite backend.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_dynamic_shape_info(dynamic_shape_info)
|
return self._option.set_lite_dynamic_shape_info(dynamic_shape_info)
|
||||||
|
|
||||||
def set_lite_subgraph_partition_path(self, subgraph_partition_path):
|
def set_lite_subgraph_partition_path(self, subgraph_partition_path):
|
||||||
""" Set nnadapter subgraph partition path for Paddle Lite backend.
|
""" Set nnadapter subgraph partition path for Paddle Lite backend.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_subgraph_partition_path(
|
return self._option.set_lite_subgraph_partition_path(
|
||||||
subgraph_partition_path)
|
subgraph_partition_path)
|
||||||
|
|
||||||
@@ -394,6 +345,9 @@ class RuntimeOption:
|
|||||||
subgraph_partition_buffer):
|
subgraph_partition_buffer):
|
||||||
""" Set nnadapter subgraph partition buffer for Paddle Lite backend.
|
""" Set nnadapter subgraph partition buffer for Paddle Lite backend.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_subgraph_partition_config_buffer(
|
return self._option.set_lite_subgraph_partition_config_buffer(
|
||||||
subgraph_partition_buffer)
|
subgraph_partition_buffer)
|
||||||
|
|
||||||
@@ -401,6 +355,9 @@ class RuntimeOption:
|
|||||||
self, mixed_precision_quantization_config_path):
|
self, mixed_precision_quantization_config_path):
|
||||||
""" Set nnadapter mixed precision quantization config path for Paddle Lite backend..
|
""" Set nnadapter mixed precision quantization config path for Paddle Lite backend..
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead."
|
||||||
|
)
|
||||||
return self._option.set_lite_mixed_precision_quantization_config_path(
|
return self._option.set_lite_mixed_precision_quantization_config_path(
|
||||||
mixed_precision_quantization_config_path)
|
mixed_precision_quantization_config_path)
|
||||||
|
|
||||||
@@ -411,21 +368,33 @@ class RuntimeOption:
|
|||||||
|
|
||||||
def set_openvino_device(self, name="CPU"):
|
def set_openvino_device(self, name="CPU"):
|
||||||
"""Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
|
"""Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
|
||||||
|
This interface is deprecated, please use `RuntimeOption.openvino_option.set_device` instead.
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead."
|
||||||
|
)
|
||||||
return self._option.set_openvino_device(name)
|
return self._option.set_openvino_device(name)
|
||||||
|
|
||||||
def set_openvino_shape_info(self, shape_info):
|
def set_openvino_shape_info(self, shape_info):
|
||||||
"""Set shape information of the models' inputs, used for GPU to fix the shape
|
"""Set shape information of the models' inputs, used for GPU to fix the shape
|
||||||
|
This interface is deprecated, please use `RuntimeOption.openvino_option.set_shape_info` instead.
|
||||||
|
|
||||||
:param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
|
:param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead."
|
||||||
|
)
|
||||||
return self._option.set_openvino_shape_info(shape_info)
|
return self._option.set_openvino_shape_info(shape_info)
|
||||||
|
|
||||||
def set_openvino_cpu_operators(self, operators):
|
def set_openvino_cpu_operators(self, operators):
|
||||||
"""While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
|
"""While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
|
||||||
|
This interface is deprecated, please use `RuntimeOption.openvino_option.set_cpu_operators` instead.
|
||||||
|
|
||||||
:param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
|
:param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
|
||||||
"""
|
"""
|
||||||
|
logging.warning(
|
||||||
|
"`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead."
|
||||||
|
)
|
||||||
return self._option.set_openvino_cpu_operators(operators)
|
return self._option.set_openvino_cpu_operators(operators)
|
||||||
|
|
||||||
def enable_paddle_log_info(self):
|
def enable_paddle_log_info(self):
|
||||||
@@ -557,15 +526,45 @@ class RuntimeOption:
|
|||||||
available_memory_proportion,
|
available_memory_proportion,
|
||||||
enable_half_partial)
|
enable_half_partial)
|
||||||
|
|
||||||
def enable_profiling(self,
|
@property
|
||||||
inclue_h2d_d2h=False,
|
def poros_option(self):
|
||||||
repeat=100, warmup=50):
|
"""Get PorosBackendOption object to configure Poros backend
|
||||||
|
|
||||||
|
:return PorosBackendOption
|
||||||
|
"""
|
||||||
|
return self._option.poros_option
|
||||||
|
|
||||||
|
@property
|
||||||
|
def paddle_lite_option(self):
|
||||||
|
"""Get LiteBackendOption object to configure Paddle Lite backend
|
||||||
|
|
||||||
|
:return LiteBackendOption
|
||||||
|
"""
|
||||||
|
return self._option.paddle_lite_option
|
||||||
|
|
||||||
|
@property
|
||||||
|
def openvino_option(self):
|
||||||
|
"""Get OpenVINOOption object to configure OpenVINO backend
|
||||||
|
|
||||||
|
:return OpenVINOOption
|
||||||
|
"""
|
||||||
|
return self._option.openvino_option
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ort_option(self):
|
||||||
|
"""Get OrtBackendOption object to configure ONNX Runtime backend
|
||||||
|
|
||||||
|
:return OrtBackendOption
|
||||||
|
"""
|
||||||
|
return self._option.ort_option
|
||||||
|
|
||||||
|
def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50):
|
||||||
"""Set the profile mode as 'true'.
|
"""Set the profile mode as 'true'.
|
||||||
:param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
|
:param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
|
||||||
:param repeat Repeat times for runtime inference.
|
:param repeat Repeat times for runtime inference.
|
||||||
:param warmup Warmup times for runtime inference.
|
:param warmup Warmup times for runtime inference.
|
||||||
"""
|
"""
|
||||||
return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup)
|
return self._option.enable_profiling(inclue_h2d_d2h, repeat, warmup)
|
||||||
|
|
||||||
def disable_profiling(self):
|
def disable_profiling(self):
|
||||||
"""Set the profile mode as 'false'.
|
"""Set the profile mode as 'false'.
|
||||||
@@ -580,8 +579,7 @@ class RuntimeOption:
|
|||||||
continue
|
continue
|
||||||
if hasattr(getattr(self._option, attr), "__call__"):
|
if hasattr(getattr(self._option, attr), "__call__"):
|
||||||
continue
|
continue
|
||||||
message += " {} : {}\t\n".format(attr,
|
message += " {} : {}\t\n".format(attr, getattr(self._option, attr))
|
||||||
getattr(self._option, attr))
|
|
||||||
message.strip("\n")
|
message.strip("\n")
|
||||||
message += ")"
|
message += ")"
|
||||||
return message
|
return message
|
||||||
|
Reference in New Issue
Block a user