diff --git a/examples/vision/classification/paddleclas/ascend/cpp/infer.cc b/examples/vision/classification/paddleclas/ascend/cpp/infer.cc index 5b9be0662..c41b79fc7 100755 --- a/examples/vision/classification/paddleclas/ascend/cpp/infer.cc +++ b/examples/vision/classification/paddleclas/ascend/cpp/infer.cc @@ -26,6 +26,8 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) { fastdeploy::RuntimeOption option; option.UseCANN(); + option.SetNNAdapterDeviceNames({"huawei_ascend_npu"}); + option.SetNNAdapterContextProperties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0"); auto model = fastdeploy::vision::classification::PaddleClasModel( model_file, params_file, config_file, option); diff --git a/fastdeploy/backends/lite/lite_backend.cc b/fastdeploy/backends/lite/lite_backend.cc index 4574130aa..cf7f90ab1 100755 --- a/fastdeploy/backends/lite/lite_backend.cc +++ b/fastdeploy/backends/lite/lite_backend.cc @@ -81,8 +81,8 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { valid_places.push_back( paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); }else if(option_.enable_cann){ - config_.set_nnadapter_device_names({"huawei_ascend_npu"}); - config_.set_nnadapter_context_properties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0"); + config_.set_nnadapter_device_names(option_.nnadapter_device_names); + config_.set_nnadapter_context_properties(option_.nnadapter_context_properties); valid_places.push_back( paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); valid_places.push_back( diff --git a/fastdeploy/backends/lite/lite_backend.h b/fastdeploy/backends/lite/lite_backend.h index 72bb5fc28..b5d0f4b93 100755 --- a/fastdeploy/backends/lite/lite_backend.h +++ b/fastdeploy/backends/lite/lite_backend.h @@ -44,6 +44,16 @@ struct LiteBackendOption { // TODO(qiuyanjun): support more options for lite backend. // Such as fp16, different device target (kARM/kXPU/kNPU/...) std::string nnadapter_subgraph_partition_config_path = ""; + std::string nnadapter_subgraph_partition_config_buffer = ""; + std::vector nnadapter_device_names = {""}; + std::string nnadapter_context_properties = ""; + std::string nnadapter_model_cache_dir = ""; + std::string nnadapter_model_cache_token = ""; + std::vector nnadapter_model_cache_buffer = {' '}; + std::map>> + nnadapter_dynamic_shape_info = {{" ", {{0}}}}; + std::string nnadapter_mixed_precision_quantization_config_path = ""; + std::string nnadapter_mixed_precision_quantization_config_buffer = ""; bool enable_timvx = false; bool enable_cann = false; }; diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index f2183499d..cf40a8cac 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -370,6 +370,46 @@ void RuntimeOption::SetLiteSubgraphPartitionPath( lite_nnadapter_subgraph_partition_config_path = nnadapter_subgraph_partition_config_path; } +void RuntimeOption::SetNNAdapterSubgraphPartitionConfigBuffer( + const std::string& nnadapter_subgraph_partition_config_buffer){ + lite_nnadapter_subgraph_partition_config_buffer = nnadapter_subgraph_partition_config_buffer; +} + +void RuntimeOption::SetNNAdapterDeviceNames(const std::vector& nnadapter_device_names){ + lite_nnadapter_device_names = nnadapter_device_names; +} + +void RuntimeOption::SetNNAdapterContextProperties(const std::string& nnadapter_context_properties){ + lite_nnadapter_context_properties = nnadapter_context_properties; +} + +void RuntimeOption::SetNNAdapterModelCacheDir(const std::string& nnadapter_model_cache_dir){ + lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir; +} + +void RuntimeOption::SetNNAdapterModelCacheBuffers( + const std::string& nnadapter_model_cache_token, + const std::vector& nnadapter_model_cache_buffer){ + lite_nnadapter_model_cache_token = nnadapter_model_cache_token; + lite_nnadapter_model_cache_buffer = nnadapter_model_cache_buffer; +} + +void RuntimeOption::SetNNAdapterDynamicShapeInfo( + const std::map>>& + nnadapter_dynamic_shape_info){ + lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; +} + +void RuntimeOption::SetNNAdapterMixedPrecisionQuantizationConfigPath( + const std::string& nnadapter_mixed_precision_quantization_config_path){ + lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path; +} + +void RuntimeOption::SetNNAdapterMixedPrecisionQuantizationConfigBuffer( + const std::string& nnadapter_mixed_precision_quantization_config_buffer){ + lite_nnadapter_mixed_precision_quantization_config_buffer = nnadapter_mixed_precision_quantization_config_buffer; +} + void RuntimeOption::SetTrtInputShape(const std::string& input_name, const std::vector& min_shape, const std::vector& opt_shape, @@ -793,6 +833,13 @@ void Runtime::CreateLiteBackend() { lite_option.power_mode = static_cast(option.lite_power_mode); lite_option.optimized_model_dir = option.lite_optimized_model_dir; lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path; + lite_option.nnadapter_subgraph_partition_config_buffer = option.lite_nnadapter_subgraph_partition_config_buffer; + lite_option.nnadapter_device_names = option.lite_nnadapter_device_names; + lite_option.nnadapter_context_properties = option.lite_nnadapter_context_properties; + lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir; + lite_option.nnadapter_dynamic_shape_info = option.lite_nnadapter_dynamic_shape_info; + lite_option.nnadapter_mixed_precision_quantization_config_path = option.lite_nnadapter_mixed_precision_quantization_config_path; + lite_option.nnadapter_mixed_precision_quantization_config_buffer = option.lite_nnadapter_mixed_precision_quantization_config_buffer; lite_option.enable_timvx = option.enable_timvx; lite_option.enable_cann = option.enable_cann; FDASSERT(option.model_format == ModelFormat::PADDLE, diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 95fabfeb5..5cafef198 100644 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -202,6 +202,55 @@ struct FASTDEPLOY_DECL RuntimeOption { void SetLiteSubgraphPartitionPath( const std::string& nnadapter_subgraph_partition_config_path); + /** + * @brief Set nnadapter subgraph partition path for Paddle Lite backend. + */ + void SetNNAdapterSubgraphPartitionConfigBuffer( + const std::string& nnadapter_subgraph_partition_config_buffer); + + /** + * @brief Set nnadapter device name for Paddle Lite backend. + */ + void SetNNAdapterDeviceNames( + const std::vector& nnadapter_device_names); + + /** + * @brief Set nnadapter context properties for Paddle Lite backend. + */ + void SetNNAdapterContextProperties( + const std::string& nnadapter_context_properties); + + /** + * @brief Set nnadapter model cache dir for Paddle Lite backend. + */ + void SetNNAdapterModelCacheDir(const std::string& nnadapter_model_cache_dir); + + /** + * @brief Set nnadapter model cache buffer for Paddle Lite backend. + */ + void SetNNAdapterModelCacheBuffers( + const std::string& nnadapter_model_cache_token, + const std::vector& nnadapter_model_cache_buffer); + + /** + * @brief Set nnadapter dynamic shape info for Paddle Lite backend. + */ + void SetNNAdapterDynamicShapeInfo( + const std::map>>& + nnadapter_dynamic_shape_info); + + /** + * @brief Set nnadapter mixed precision quantization config path for Paddle Lite backend. + */ + void SetNNAdapterMixedPrecisionQuantizationConfigPath( + const std::string& nnadapter_mixed_precision_quantization_config_path); + + /** + * @brief Set nnadapter mixed precision quantization config buffer for Paddle Lite backend. + */ + void SetNNAdapterMixedPrecisionQuantizationConfigBuffer( + const std::string& nnadapter_mixed_precision_quantization_config_buffer); + /** * @brief enable half precision while use paddle lite backend */ @@ -355,6 +404,18 @@ struct FASTDEPLOY_DECL RuntimeOption { // optimized model dir for CxxConfig std::string lite_optimized_model_dir = ""; std::string lite_nnadapter_subgraph_partition_config_path = ""; + // and other nnadapter settings for CxxConfig + std::string lite_nnadapter_subgraph_partition_config_buffer = ""; + std::vector lite_nnadapter_device_names = {""}; + std::string lite_nnadapter_context_properties = ""; + std::string lite_nnadapter_model_cache_dir = ""; + std::string lite_nnadapter_model_cache_token = ""; + std::vector lite_nnadapter_model_cache_buffer = {' '}; + std::map>> + lite_nnadapter_dynamic_shape_info = {{" ", {{0}}}}; + std::string lite_nnadapter_mixed_precision_quantization_config_path = ""; + std::string lite_nnadapter_mixed_precision_quantization_config_buffer = ""; + bool enable_timvx = false; bool enable_cann = false;