[Other] Deprecate some option api and parameters (#1243)

* Optimize Poros backend

* fix error

* Add more pybind

* fix conflicts

* add some deprecate notices

* [Other] Deprecate some apis in RuntimeOption (#1240)

* Deprecate more options

* modify serving

* Update option.h

* fix tensorrt error

* Update option_pybind.cc

* Update option_pybind.cc

* Fix error in serving

* fix word spell error
This commit is contained in:
Jason
2023-02-07 17:57:46 +08:00
committed by GitHub
parent a18cc0f94c
commit 713afe7f1c
15 changed files with 380 additions and 229 deletions

View File

@@ -154,6 +154,8 @@ class RuntimeOption:
"""Options for FastDeploy Runtime.
"""
__slots__ = ["_option"]
def __init__(self):
"""Initialize a FastDeploy RuntimeOption object.
"""
@@ -266,7 +268,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_ort_graph_opt_level` will be deprecated in v1.2.0, please use `RuntimeOption.graph_optimize_level = 99` instead."
)
return self._option.set_ort_graph_opt_level(level)
self._option.ort_option.graph_optimize_level = level
def use_paddle_backend(self):
"""Use Paddle Inference backend, support inference Paddle model on CPU/Nvidia GPU.
@@ -314,7 +316,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_context_properties` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_context_properties = ...` instead."
)
return self._option.set_lite_context_properties(context_properties)
self._option.paddle_lite_option.nnadapter_context_properties = context_properties
def set_lite_model_cache_dir(self, model_cache_dir):
"""Set nnadapter model cache dir for Paddle Lite backend.
@@ -322,7 +324,8 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_model_cache_dir` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_model_cache_dir = ...` instead."
)
return self._option.set_lite_model_cache_dir(model_cache_dir)
self._option.paddle_lite_option.nnadapter_model_cache_dir = model_cache_dir
def set_lite_dynamic_shape_info(self, dynamic_shape_info):
""" Set nnadapter dynamic shape info for Paddle Lite backend.
@@ -330,7 +333,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_dynamic_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_dynamic_shape_info = ...` instead."
)
return self._option.set_lite_dynamic_shape_info(dynamic_shape_info)
self._option.paddle_lite_option.nnadapter_dynamic_shape_info = dynamic_shape_info
def set_lite_subgraph_partition_path(self, subgraph_partition_path):
""" Set nnadapter subgraph partition path for Paddle Lite backend.
@@ -338,8 +341,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_subgraph_partition_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_path = ...` instead."
)
return self._option.set_lite_subgraph_partition_path(
subgraph_partition_path)
self._option.paddle_lite_option.nnadapter_subgraph_partition_config_path = subgraph_partition_path
def set_lite_subgraph_partition_config_buffer(self,
subgraph_partition_buffer):
@@ -348,8 +350,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_subgraph_partition_buffer` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = ...` instead."
)
return self._option.set_lite_subgraph_partition_config_buffer(
subgraph_partition_buffer)
self._option.paddle_lite_option.nnadapter_subgraph_partition_config_buffer = subgraph_partition_buffer
def set_lite_mixed_precision_quantization_config_path(
self, mixed_precision_quantization_config_path):
@@ -358,8 +359,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_lite_mixed_precision_quantization_config_path` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = ...` instead."
)
return self._option.set_lite_mixed_precision_quantization_config_path(
mixed_precision_quantization_config_path)
self._option.paddle_lite_option.nnadapter_mixed_precision_quantization_config_path = mixed_precision_quantization_config_path
def set_paddle_mkldnn(self, use_mkldnn=True):
"""Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default.
@@ -373,7 +373,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_openvino_device` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_device` instead."
)
return self._option.set_openvino_device(name)
self._option.openvino_option.set_device(name)
def set_openvino_shape_info(self, shape_info):
"""Set shape information of the models' inputs, used for GPU to fix the shape
@@ -384,7 +384,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_openvino_shape_info` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_shape_info` instead."
)
return self._option.set_openvino_shape_info(shape_info)
self._option.openvino_option.set_shape_info(shape_info)
def set_openvino_cpu_operators(self, operators):
"""While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
@@ -395,7 +395,7 @@ class RuntimeOption:
logging.warning(
"`RuntimeOption.set_openvino_cpu_operators` will be deprecated in v1.2.0, please use `RuntimeOption.openvino_option.set_cpu_operators` instead."
)
return self._option.set_openvino_cpu_operators(operators)
self._option.openvino_option.set_cpu_operators(operators)
def enable_paddle_log_info(self):
"""Enable print out the debug log information while using Paddle Inference backend, the log information is disabled by default.
@@ -415,17 +415,26 @@ class RuntimeOption:
def enable_lite_fp16(self):
"""Enable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default.
"""
return self._option.enable_lite_fp16()
logging.warning(
"`RuntimeOption.enable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = True` instead."
)
self._option.paddle_lite_option.enable_fp16 = True
def disable_lite_fp16(self):
"""Disable half precision inference while using Paddle Lite backend on ARM CPU, fp16 is disabled by default.
"""
return self._option.disable_lite_fp16()
logging.warning(
"`RuntimeOption.disable_lite_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.enable_fp16 = False` instead."
)
self._option.paddle_lite_option.enable_fp16 = False
def set_lite_power_mode(self, mode):
"""Set POWER mode while using Paddle Lite backend on ARM CPU.
"""
return self._option.set_lite_power_mode(mode)
logging.warning(
"`RuntimeOption.set_lite_powermode` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_lite_option.power_mode = {}` instead.".
format(mode))
self._option.paddle_lite_option.power_mode = mode
def set_trt_input_shape(self,
tensor_name,
@@ -439,30 +448,42 @@ class RuntimeOption:
:param opt_shape: (list of int)Optimize shape of the input, this offten set as the most common input shape, if set to None, it will keep same with min_shape
:param max_shape: (list of int)Maximum shape of the input, e.g [8, 3, 224, 224], if set to None, it will keep same with the min_shape
"""
logging.warning(
"`RuntimeOption.set_trt_input_shape` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.set_shape()` instead."
)
if opt_shape is None and max_shape is None:
opt_shape = min_shape
max_shape = min_shape
else:
assert opt_shape is not None and max_shape is not None, "Set min_shape only, or set min_shape, opt_shape, max_shape both."
return self._option.set_trt_input_shape(tensor_name, min_shape,
opt_shape, max_shape)
return self._option.trt_option.set_shape(tensor_name, min_shape,
opt_shape, max_shape)
def set_trt_cache_file(self, cache_file_path):
"""Set a cache file path while using TensorRT backend. While loading a Paddle/ONNX model with set_trt_cache_file("./tensorrt_cache/model.trt"), if file `./tensorrt_cache/model.trt` exists, it will skip building tensorrt engine and load the cache file directly; if file `./tensorrt_cache/model.trt` doesn't exist, it will building tensorrt engine and save the engine as binary string to the cache file.
:param cache_file_path: (str)Path of tensorrt cache file
"""
return self._option.set_trt_cache_file(cache_file_path)
logging.warning(
"`RuntimeOption.set_trt_cache_file` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.serialize_file = {}` instead.".
format(cache_file_path))
self._option.trt_option.serialize_file = cache_file_path
def enable_trt_fp16(self):
"""Enable half precision inference while using TensorRT backend, notice that not all the Nvidia GPU support FP16, in those cases, will fallback to FP32 inference.
"""
return self._option.enable_trt_fp16()
logging.warning(
"`RuntimeOption.enable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = True` instead."
)
self._option.trt_option.enable_fp16 = True
def disable_trt_fp16(self):
"""Disable half precision inference while suing TensorRT backend.
"""
return self._option.disable_trt_fp16()
logging.warning(
"`RuntimeOption.disable_trt_fp16` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.enable_fp16 = False` instead."
)
self._option.trt_option.enable_fp16 = False
def enable_pinned_memory(self):
"""Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
@@ -482,12 +503,18 @@ class RuntimeOption:
def set_trt_max_workspace_size(self, trt_max_workspace_size):
"""Set max workspace size while using TensorRT backend.
"""
return self._option.set_trt_max_workspace_size(trt_max_workspace_size)
logging.warning(
"`RuntimeOption.set_trt_max_workspace_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_workspace_size = {}` instead.".
format(trt_max_workspace_size))
self._option.trt_option.max_workspace_size = trt_max_workspace_size
def set_trt_max_batch_size(self, trt_max_batch_size):
"""Set max batch size while using TensorRT backend.
"""
return self._option.set_trt_max_batch_size(trt_max_batch_size)
logging.warning(
"`RuntimeOption.set_trt_max_batch_size` will be deprecated in v1.2.0, please use `RuntimeOption.trt_option.max_batch_size = {}` instead.".
format(trt_max_batch_size))
self._option.trt_option.max_batch_size = trt_max_batch_size
def enable_paddle_trt_collect_shape(self):
"""Enable collect subgraph shape information while using Paddle Inference with TensorRT
@@ -558,6 +585,14 @@ class RuntimeOption:
"""
return self._option.ort_option
@property
def trt_option(self):
"""Get TrtBackendOption object to configure TensorRT backend
:return TrtBackendOption
"""
return self._option.trt_option
def enable_profiling(self, inclue_h2d_d2h=False, repeat=100, warmup=50):
"""Set the profile mode as 'true'.
:param inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.