diff --git a/fastdeploy/backends/lite/lite_backend.h b/fastdeploy/backends/lite/lite_backend.h index b5d0f4b93..9a7718739 100755 --- a/fastdeploy/backends/lite/lite_backend.h +++ b/fastdeploy/backends/lite/lite_backend.h @@ -48,12 +48,9 @@ struct LiteBackendOption { std::vector nnadapter_device_names = {""}; std::string nnadapter_context_properties = ""; std::string nnadapter_model_cache_dir = ""; - std::string nnadapter_model_cache_token = ""; - std::vector nnadapter_model_cache_buffer = {' '}; std::map>> nnadapter_dynamic_shape_info = {{" ", {{0}}}}; std::string nnadapter_mixed_precision_quantization_config_path = ""; - std::string nnadapter_mixed_precision_quantization_config_buffer = ""; bool enable_timvx = false; bool enable_cann = false; }; diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 75767c665..789c0d7d2 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -23,6 +23,7 @@ void BindRuntime(pybind11::module& m) { .def("use_gpu", &RuntimeOption::UseGpu) .def("use_cpu", &RuntimeOption::UseCpu) .def("use_rknpu2", &RuntimeOption::UseRKNPU2) + .def("use_cann", &RuntimeOption::UseCANN) .def("set_external_stream", &RuntimeOption::SetExternalStream) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) @@ -32,6 +33,13 @@ void BindRuntime(pybind11::module& m) { .def("use_trt_backend", &RuntimeOption::UseTrtBackend) .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend) .def("use_lite_backend", &RuntimeOption::UseLiteBackend) + .def("set_lite_nnadapter_device_names", &RuntimeOption::SetLiteNNAdapterDeviceNames) + .def("set_lite_nnadapter_context_properties", &RuntimeOption::SetLiteNNAdapterContextProperties) + .def("set_lite_nnadapter_model_cache_dir", &RuntimeOption::SetLiteNNAdapterModelCacheDir) + .def("set_lite_nnadapter_dynamic_shape_info", &RuntimeOption::SetLiteNNAdapterDynamicShapeInfo) + .def("set_lite_nnadapter_subgraph_partition_path", &RuntimeOption::SetLiteNNAdapterSubgraphPartitionPath) + .def("set_lite_nnadapter_mixed_precision_quantization_config_path", &RuntimeOption::SetLiteNNAdapterMixedPrecisionQuantizationConfigPath) + .def("set_lite_nnadapter_subgraph_partition_config_buffer", &RuntimeOption::SetLiteNNAdapterSubgraphPartitionConfigBuffer) .def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN) .def("set_openvino_device", &RuntimeOption::SetOpenVINODevice) .def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo) diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 07effdccc..30abd15b3 100644 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -386,12 +386,6 @@ void RuntimeOption::SetLiteNNAdapterModelCacheDir(const std::string& nnadapter_m lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir; } -void RuntimeOption::SetLiteNNAdapterModelCacheBuffers( - const std::string& nnadapter_model_cache_token, - const std::vector& nnadapter_model_cache_buffer){ - lite_nnadapter_model_cache_token = nnadapter_model_cache_token; - lite_nnadapter_model_cache_buffer = nnadapter_model_cache_buffer; -} void RuntimeOption::SetLiteNNAdapterDynamicShapeInfo( const std::map>>& @@ -404,10 +398,6 @@ void RuntimeOption::SetLiteNNAdapterMixedPrecisionQuantizationConfigPath( lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path; } -void RuntimeOption::SetLiteNNAdapterMixedPrecisionQuantizationConfigBuffer( - const std::string& nnadapter_mixed_precision_quantization_config_buffer){ - lite_nnadapter_mixed_precision_quantization_config_buffer = nnadapter_mixed_precision_quantization_config_buffer; -} void RuntimeOption::SetTrtInputShape(const std::string& input_name, const std::vector& min_shape, @@ -833,7 +823,6 @@ void Runtime::CreateLiteBackend() { lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir; lite_option.nnadapter_dynamic_shape_info = option.lite_nnadapter_dynamic_shape_info; lite_option.nnadapter_mixed_precision_quantization_config_path = option.lite_nnadapter_mixed_precision_quantization_config_path; - lite_option.nnadapter_mixed_precision_quantization_config_buffer = option.lite_nnadapter_mixed_precision_quantization_config_buffer; lite_option.enable_timvx = option.enable_timvx; lite_option.enable_cann = option.enable_cann; FDASSERT(option.model_format == ModelFormat::PADDLE, diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 36dcde9cb..d33f5cdce 100644 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -222,13 +222,6 @@ struct FASTDEPLOY_DECL RuntimeOption { void SetLiteNNAdapterModelCacheDir( const std::string& nnadapter_model_cache_dir); - /** - * @brief Set nnadapter model cache buffer for Paddle Lite backend. - */ - void SetLiteNNAdapterModelCacheBuffers( - const std::string& nnadapter_model_cache_token, - const std::vector& nnadapter_model_cache_buffer); - /** * @brief Set nnadapter dynamic shape info for Paddle Lite backend. */ @@ -242,12 +235,6 @@ struct FASTDEPLOY_DECL RuntimeOption { void SetLiteNNAdapterMixedPrecisionQuantizationConfigPath( const std::string& nnadapter_mixed_precision_quantization_config_path); - /** - * @brief Set nnadapter mixed precision quantization config buffer for Paddle Lite backend. - */ - void SetLiteNNAdapterMixedPrecisionQuantizationConfigBuffer( - const std::string& nnadapter_mixed_precision_quantization_config_buffer); - /** * @brief enable half precision while use paddle lite backend */ @@ -411,12 +398,9 @@ struct FASTDEPLOY_DECL RuntimeOption { std::vector lite_nnadapter_device_names = {""}; std::string lite_nnadapter_context_properties = ""; std::string lite_nnadapter_model_cache_dir = ""; - std::string lite_nnadapter_model_cache_token = ""; - std::vector lite_nnadapter_model_cache_buffer = {' '}; std::map>> lite_nnadapter_dynamic_shape_info = {{" ", {{0}}}}; std::string lite_nnadapter_mixed_precision_quantization_config_path = ""; - std::string lite_nnadapter_mixed_precision_quantization_config_buffer = ""; bool enable_timvx = false; bool enable_cann = false; diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index 6461da66d..3956b25ec 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -255,6 +255,11 @@ class RuntimeOption: rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0): return self._option.use_rknpu2(rknpu2_name, rknpu2_core) + def use_cann(self): + """Inference with Huawei Ascend NPU + """ + return self._option.use_cann() + def set_cpu_thread_num(self, thread_num=-1): """Set number of threads if inference with CPU @@ -309,6 +314,49 @@ class RuntimeOption: """ return self.use_lite_backend() + def set_lite_nnadapter_device_names(self, device_names): + """Set nnadapter device name for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_device_names(device_names) + + def set_lite_nnadapter_context_properties(self, context_properties): + """Set nnadapter context properties for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_context_properties( + context_properties) + + def set_lite_nnadapter_model_cache_dir(self, model_cache_dir): + """Set nnadapter model cache dir for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_model_cache_dir(model_cache_dir) + + def set_lite_nnadapter_dynamic_shape_info(self, dynamic_shape_info): + """ Set nnadapter dynamic shape info for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_dynamic_shape_info( + dynamic_shape_info) + + def set_lite_nnadapter_subgraph_partition_path(self, + subgraph_partition_path): + """ Set nnadapter subgraph partition path for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_subgraph_partition_path( + subgraph_partition_path) + + def set_lite_nnadapter_subgraph_partition_config_buffer( + self, subgraph_partition_buffer): + """ Set nnadapter subgraph partition buffer for Paddle Lite backend. + """ + return self._option.set_lite_nnadapter_subgraph_partition_config_buffer( + subgraph_partition_buffer) + + def set_lite_nnadapter_mixed_precision_quantization_config_path( + self, mixed_precision_quantization_config_path): + """ Set nnadapter mixed precision quantization config path for Paddle Lite backend.. + """ + return self._option.set_lite_nnadapter_mixed_precision_quantization_config_path( + mixed_precision_quantization_config_path) + def set_paddle_mkldnn(self, use_mkldnn=True): """Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default. """