diff --git a/CMakeLists.txt b/CMakeLists.txt index 51ba10c83..4f85653f2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -715,6 +715,16 @@ if(WITH_ASCEND) ) endif() +if(WITH_CAPI) + install( + DIRECTORY ${PROJECT_SOURCE_DIR}/c_api/fastdeploy_capi + DESTINATION ${CMAKE_INSTALL_PREFIX}/include + FILES_MATCHING + PATTERN "*.h" + PATTERN "*/types_internal.h" EXCLUDE + ) +endif() + include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake) ############################### Building: FastDeploy Python Wheel ############################# diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index d622660f4..c79001c28 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -33,6 +33,7 @@ set(ORT_DIRECTORY "@ORT_DIRECTORY@") set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@") set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@") set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@) +set(WITH_CAPI @WITH_CAPI@) # Whether to use FastDeploy static lib. The default # value for this option is determined by the SDK # build-time options. @@ -357,6 +358,7 @@ message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}") message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}") message(STATUS "") message(STATUS " WITH_GPU : ${WITH_GPU}") +message(STATUS " WITH_CAPI : ${WITH_CAPI}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}") @@ -365,6 +367,7 @@ message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}") + if(ENABLE_PADDLE_BACKEND) message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") endif() diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc old mode 100644 new mode 100755 index 545474635..5541696d5 --- a/benchmark/cpp/benchmark_ppyolov8.cc +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -12,16 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "flags.h" #include "macros.h" +#include "flags.h" #include "option.h" -#ifdef WIN32 -const char sep = '\\'; -#else -const char sep = '/'; -#endif - int main(int argc, char* argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); auto im = cv::imread(FLAGS_image); @@ -31,6 +25,7 @@ int main(int argc, char* argv[]) { PrintUsage(); return false; } + PrintBenchmarkInfo(); auto model_file = FLAGS_model + sep + "model.pdmodel"; auto params_file = FLAGS_model + sep + "model.pdiparams"; auto config_file = FLAGS_model + sep + "infer_cfg.yml"; diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100644 new mode 100755 index 5b2cab855..6ab3c5990 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "flags.h" #include "macros.h" +#include "flags.h" #include "option.h" int main(int argc, char* argv[]) { @@ -25,6 +25,7 @@ int main(int argc, char* argv[]) { PrintUsage(); return false; } + PrintBenchmarkInfo(); auto model_yolov5 = fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option); fastdeploy::vision::DetectionResult res; diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 7f8c3a29f..fd20e685c 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -16,6 +16,12 @@ #include "gflags/gflags.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + DEFINE_string(model, "", "Directory of the inference model."); DEFINE_string(image, "", "Path of the image file."); DEFINE_string(device, "cpu", @@ -48,3 +54,35 @@ void PrintUsage() { std::cout << "Default value of backend: default" << std::endl; std::cout << "Default value of use_fp16: false" << std::endl; } + +void PrintBenchmarkInfo() { + // Get model name + std::vector model_names; + fastdeploy::benchmark::Split(FLAGS_model, model_names, sep); + // Save benchmark info + std::stringstream ss; + ss.precision(3); + ss << "\n======= Model Info =======\n"; + ss << "model_name: " << model_names[model_names.size() - 1] << std::endl; + ss << "profile_mode: " << FLAGS_profile_mode << std::endl; + if (FLAGS_profile_mode == "runtime") { + ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl; + } + ss << "\n======= Backend Info =======\n"; + ss << "warmup: " << FLAGS_warmup << std::endl; + ss << "repeats: " << FLAGS_repeat << std::endl; + ss << "device: " << FLAGS_device << std::endl; + if (FLAGS_device == "gpu") { + ss << "device_id: " << FLAGS_device_id << std::endl; + } + ss << "backend: " << FLAGS_backend << std::endl; + ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl; + ss << "use_fp16: " << FLAGS_use_fp16 << std::endl; + ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl; + if (FLAGS_collect_memory_info) { + ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval) + << "ms" << std::endl; + } + std::cout << ss.str() << std::endl; + return; +} diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h index bebd26e0d..77df0c657 100755 --- a/benchmark/cpp/macros.h +++ b/benchmark/cpp/macros.h @@ -18,7 +18,6 @@ #define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \ { \ - std::cout << "====" << #MODEL_NAME << "====" << std::endl; \ if (!MODEL_NAME.Initialized()) { \ std::cerr << "Failed to initialize." << std::endl; \ return 0; \ @@ -62,8 +61,8 @@ float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \ float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \ float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \ - std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl; \ - std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl; \ + std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \ + std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \ std::cout << "gpu_util: " << __gpu_util__ << std::endl; \ __resource_moniter__.Stop(); \ } \ diff --git a/c_api/CMakeLists.txt b/c_api/CMakeLists.txt index 7c7a16626..4f3934165 100644 --- a/c_api/CMakeLists.txt +++ b/c_api/CMakeLists.txt @@ -19,6 +19,7 @@ if(NOT WITH_CAPI) return() endif() +configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h) file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc) if(NOT ENABLE_VISION) file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc) diff --git a/c_api/fastdeploy_capi/config.h b/c_api/fastdeploy_capi/config.h new file mode 100755 index 000000000..73de04c44 --- /dev/null +++ b/c_api/fastdeploy_capi/config.h @@ -0,0 +1,22 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef ENABLE_VISION +#define ENABLE_VISION +#endif + +#ifndef ENABLE_TEXT +/* #undef ENABLE_TEXT */ +#endif diff --git a/c_api/fastdeploy_capi/config.h.in b/c_api/fastdeploy_capi/config.h.in new file mode 100755 index 000000000..4b5b0137c --- /dev/null +++ b/c_api/fastdeploy_capi/config.h.in @@ -0,0 +1,22 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef ENABLE_VISION +#cmakedefine ENABLE_VISION +#endif + +#ifndef ENABLE_TEXT +#cmakedefine ENABLE_TEXT +#endif diff --git a/c_api/fastdeploy_capi/enum_variables.h b/c_api/fastdeploy_capi/enum_variables.h new file mode 100644 index 000000000..05a3d4c9f --- /dev/null +++ b/c_api/fastdeploy_capi/enum_variables.h @@ -0,0 +1,71 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#define FD_ENUM(type) \ + typedef int32_t type; \ + enum + +FD_ENUM(FD_C_ModelFormat){ + AUTOREC, ///< Auto recognize the model format by model file name + PADDLE, ///< Model with paddlepaddle format + ONNX, ///< Model with ONNX format + RKNN, ///< Model with RKNN format + TORCHSCRIPT, ///< Model with TorchScript format + SOPHGO, ///< Model with SOPHGO format +}; + +FD_ENUM(FD_C_rknpu2_CpuName){ + RK356X = 0, /* run on RK356X. */ + RK3588 = 1, /* default,run on RK3588. */ + UNDEFINED, +}; + +FD_ENUM(FD_C_rknpu2_CoreMask){ + RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly. + RKNN_NPU_CORE_0 = 1, //< run on NPU core 0. + RKNN_NPU_CORE_1 = 2, //< run on NPU core 1. + RKNN_NPU_CORE_2 = 4, //< run on NPU core 2. + RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | + RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2. + RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | + RKNN_NPU_CORE_2, //< run on NPU core 1 and core 2. + RKNN_NPU_CORE_UNDEFINED, +}; + +FD_ENUM(FD_C_LitePowerMode){ + LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode + LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode + LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode + LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode + LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode + LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode +}; + +FD_ENUM(FD_C_ResultType){ + UNKNOWN_RESULT, + CLASSIFY, + DETECTION, + SEGMENTATION, + OCR, + MOT, + FACE_DETECTION, + FACE_ALIGNMENT, + FACE_RECOGNITION, + MATTING, + MASK, + KEYPOINT_DETECTION, + HEADPOSE, +}; diff --git a/c_api/fastdeploy_capi/fd_common.h b/c_api/fastdeploy_capi/fd_common.h index 6374cf9b5..18bfb3c91 100644 --- a/c_api/fastdeploy_capi/fd_common.h +++ b/c_api/fastdeploy_capi/fd_common.h @@ -58,43 +58,3 @@ typedef int8_t FD_C_Bool; #define TRUE 1 #define FALSE 0 - -#define FD_ENUM(type) \ - typedef int32_t type; \ - enum - -FD_ENUM(FD_C_ModelFormat){ - AUTOREC, ///< Auto recognize the model format by model file name - PADDLE, ///< Model with paddlepaddle format - ONNX, ///< Model with ONNX format - RKNN, ///< Model with RKNN format - TORCHSCRIPT, ///< Model with TorchScript format - SOPHGO, ///< Model with SOPHGO format -}; - -FD_ENUM(FD_C_rknpu2_CpuName){ - RK356X = 0, /* run on RK356X. */ - RK3588 = 1, /* default,run on RK3588. */ - UNDEFINED, -}; - -FD_ENUM(FD_C_rknpu2_CoreMask){ - RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly. - RKNN_NPU_CORE_0 = 1, //< run on NPU core 0. - RKNN_NPU_CORE_1 = 2, //< run on NPU core 1. - RKNN_NPU_CORE_2 = 4, //< run on NPU core 2. - RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | - RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2. - RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | - RKNN_NPU_CORE_2, //< run on NPU core 1 and core 2. - RKNN_NPU_CORE_UNDEFINED, -}; - -FD_ENUM(FD_C_LitePowerMode){ - LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode - LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode - LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode - LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode - LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode - LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode -}; diff --git a/c_api/fastdeploy_capi/fd_type.cc b/c_api/fastdeploy_capi/fd_type.cc new file mode 100644 index 000000000..34390888a --- /dev/null +++ b/c_api/fastdeploy_capi/fd_type.cc @@ -0,0 +1,40 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/fd_type.h" + +#include + +#include "fastdeploy_capi/fd_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +FD_C_Mat FD_C_Imread(const char* imgpath) { + cv::Mat image = cv::imread(imgpath); + return new cv::Mat(image); +} + +FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img) { + cv::Mat cv_img = *(reinterpret_cast(img)); + bool result = cv::imwrite(savepath, cv_img); + return result; +} + +void FD_C_DestroyMat(FD_C_Mat mat) { delete reinterpret_cast(mat); } + +#ifdef __cplusplus +} +#endif diff --git a/c_api/fastdeploy_capi/fd_type.h b/c_api/fastdeploy_capi/fd_type.h index 75daf9db6..4a3d8cadd 100644 --- a/c_api/fastdeploy_capi/fd_type.h +++ b/c_api/fastdeploy_capi/fd_type.h @@ -17,7 +17,8 @@ #include #include -#include "fastdeploy_capi/fd_common.h" // NOLINT +#include "fastdeploy_capi/enum_variables.h" +#include "fastdeploy_capi/fd_common.h" typedef struct FD_C_OneDimArrayUint8 { size_t size; @@ -65,3 +66,19 @@ typedef struct FD_C_TwoDimArrayFloat { } FD_C_TwoDimArrayFloat; // std::vector> typedef void* FD_C_Mat; + +#ifdef __cplusplus +extern "C" { +#endif + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat +FD_C_Imread(const char* imgpath); + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_Imwrite(const char* savepath, + __fd_keep FD_C_Mat); + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyMat(__fd_take FD_C_Mat mat); + +#ifdef __cplusplus +} +#endif diff --git a/c_api/fastdeploy_capi/runtime_option.cc b/c_api/fastdeploy_capi/runtime_option.cc index 3c9b4022d..4683d468d 100644 --- a/c_api/fastdeploy_capi/runtime_option.cc +++ b/c_api/fastdeploy_capi/runtime_option.cc @@ -17,7 +17,9 @@ #include "fastdeploy/utils/utils.h" #include "fastdeploy_capi/types_internal.h" +#ifdef __cplusplus extern "C" { +#endif FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() { FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper = @@ -28,7 +30,7 @@ FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() { return fd_c_runtime_option_wrapper; } -void FD_C_DestroyRuntimeOption( +void FD_C_DestroyRuntimeOptionWrapper( __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { delete fd_c_runtime_option_wrapper; } @@ -404,15 +406,6 @@ void FD_C_RuntimeOptionWrapperUseIpu( batches_per_step); } -void FD_C_RuntimeOptionWrapperSetIpuConfig( - __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, - FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion, - FD_C_Bool enable_half_partial) { - auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, - fd_c_runtime_option_wrapper); - runtime_option->SetIpuConfig(enable_fp16, replica_num, - available_memory_proportion, - enable_half_partial); +#ifdef __cplusplus } - -} // extern "C" +#endif diff --git a/c_api/fastdeploy_capi/runtime_option.h b/c_api/fastdeploy_capi/runtime_option.h index cfc087473..40d220bbe 100644 --- a/c_api/fastdeploy_capi/runtime_option.h +++ b/c_api/fastdeploy_capi/runtime_option.h @@ -14,9 +14,7 @@ #pragma once -#include - -#include "fastdeploy_capi/fd_common.h" +#include "fastdeploy_capi/fd_type.h" typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper; @@ -499,19 +497,6 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseIpu( int device_num, int micro_batch_size, FD_C_Bool enable_pipelining, int batches_per_step); -/** \brief Set IPU config. - * - * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object - * \param[in] enable_fp16 enable fp16. - * \param[in] replica_num the number of graph replication. - * \param[in] available_memory_proportion the available memory proportion for matmul/conv. - * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16. - */ -FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetIpuConfig( - __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, - FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion, - FD_C_Bool enable_half_partial); - #ifdef __cplusplus } // extern "C" #endif diff --git a/c_api/fastdeploy_capi/vision.h b/c_api/fastdeploy_capi/vision.h new file mode 100644 index 000000000..a27a6c8dd --- /dev/null +++ b/c_api/fastdeploy_capi/vision.h @@ -0,0 +1,26 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy_capi/config.h" + +#ifdef ENABLE_VISION +#include "fastdeploy_capi/vision/classification/ppcls/model.h" +#include "fastdeploy_capi/vision/detection/ppdet/model.h" +#include "fastdeploy_capi/vision/result.h" +#include "fastdeploy_capi/vision/visualize.h" +#endif + +#include "fastdeploy_capi/fd_type.h" +#include "fastdeploy_capi/runtime_option.h" diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc index 3ed62f26a..ad0028612 100644 --- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc +++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc @@ -16,7 +16,9 @@ #include "fastdeploy_capi/types_internal.h" +#ifdef __cplusplus extern "C" { +#endif FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper( const char* model_file, const char* params_file, const char* config_file, @@ -50,4 +52,7 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict( ClassifyResultWrapper, fd_c_classify_result_wrapper); return paddleclas_model->Predict(im, classify_result.get()); } -} \ No newline at end of file + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc index 17a87ec8b..e7055185f 100644 --- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc +++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc @@ -17,7 +17,9 @@ #include "fastdeploy_capi/types_internal.h" #include "fastdeploy_capi/vision/visualize.h" +#ifdef __cplusplus extern "C" { +#endif FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper( const char* model_file, const char* params_file, const char* config_file, @@ -50,4 +52,7 @@ FD_C_Bool FD_C_PPYOLOEWrapperPredict( DetectionResultWrapper, fd_c_detection_result_wrapper); return ppyoloe_model->Predict(im, detection_result.get()); } -} \ No newline at end of file + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/result.cc b/c_api/fastdeploy_capi/vision/result.cc index abf52ba69..207ef3950 100644 --- a/c_api/fastdeploy_capi/vision/result.cc +++ b/c_api/fastdeploy_capi/vision/result.cc @@ -17,7 +17,9 @@ #include "fastdeploy/utils/utils.h" #include "fastdeploy_capi/types_internal.h" +#ifdef __cplusplus extern "C" { +#endif // Classification Results @@ -235,4 +237,6 @@ FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData( return fd_c_detection_result_wrapper; } -} \ No newline at end of file +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/result.h b/c_api/fastdeploy_capi/vision/result.h index 9d32052d9..247ec50f6 100644 --- a/c_api/fastdeploy_capi/vision/result.h +++ b/c_api/fastdeploy_capi/vision/result.h @@ -24,22 +24,6 @@ typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper; extern "C" { #endif -FD_ENUM(FD_C_ResultType){ - UNKNOWN_RESULT, - CLASSIFY, - DETECTION, - SEGMENTATION, - OCR, - MOT, - FACE_DETECTION, - FACE_ALIGNMENT, - FACE_RECOGNITION, - MATTING, - MASK, - KEYPOINT_DETECTION, - HEADPOSE, -}; - typedef struct FD_C_ClassifyResult { FD_C_OneDimArrayInt32 label_ids; FD_C_OneDimArrayFloat scores; diff --git a/c_api/fastdeploy_capi/vision/visualize.cc b/c_api/fastdeploy_capi/vision/visualize.cc index 9132fe606..6045270bd 100644 --- a/c_api/fastdeploy_capi/vision/visualize.cc +++ b/c_api/fastdeploy_capi/vision/visualize.cc @@ -17,7 +17,9 @@ #include "fastdeploy/vision/visualize/visualize.h" #include "fastdeploy_capi/types_internal.h" +#ifdef __cplusplus extern "C" { +#endif FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_c_detection_result, @@ -32,4 +34,6 @@ FD_C_Mat FD_C_VisDetection(FD_C_Mat im, line_size, font_size); return new cv::Mat(result); } -} \ No newline at end of file +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/examples/vision/detection/paddledetection/c/CMakeLists.txt b/examples/vision/detection/paddledetection/c/CMakeLists.txt new file mode 100644 index 000000000..12a8d9a2c --- /dev/null +++ b/examples/vision/detection/paddledetection/c/CMakeLists.txt @@ -0,0 +1,13 @@ +PROJECT(infer_demo C) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.c) +target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/detection/paddledetection/c/README.md b/examples/vision/detection/paddledetection/c/README.md new file mode 100644 index 000000000..79a33e51a --- /dev/null +++ b/examples/vision/detection/paddledetection/c/README.md @@ -0,0 +1,200 @@ +English | [简体中文](README_CN.md) +# PaddleDetection C Deployment Example + +This directory provides examples that `infer_xxx.c` fast finishes the deployment of PaddleDetection models, including PPYOLOE on CPU/GPU. + +Before deployment, two steps require confirmation + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) + +Taking inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.4 or above (x.x.x>=1.0.4) is required to support this model. + +```bash +ppyoloe is taken as an example for inference deployment + +mkdir build +cd build +# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# Download the PPYOLOE model file and test images +wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz +wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg +tar xvf ppyoloe_crn_l_300e_coco.tgz + + +# CPU inference +./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0 +# GPU inference +./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1 +``` + +The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to: +- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md) + +## PaddleDetection C Interface + +### RuntimeOption + +```c +FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() +``` + +> Create a RuntimeOption object, and return a pointer to manipulate it. +> +> **Return** +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object. + + +```c +void FD_C_RuntimeOptionWrapperUseCpu( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) +``` + +> Enable Cpu inference. +> +> **Params** +> +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object. + +```c +void FD_C_RuntimeOptionWrapperUseGpu( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int gpu_id) +``` +> Enable Gpu inference. +> +> **Params** +> +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object. +> * **gpu_id**(int): gpu id + + +### Model + +```c + +FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* runtime_option, + const FD_C_ModelFormat model_format) + +``` + +> Create a PPYOLOE model object, and return a pointer to manipulate it. +> +> **Params** +> +> * **model_file**(const char*): Model file path +> * **params_file**(const char*): Parameter file path +> * **config_file**(const char*): Configuration file path, which is the deployment yaml file exported by PaddleDetection +> * **runtime_option**(FD_C_RuntimeOptionWrapper*): Backend inference configuration. None by default, which is the default configuration +> * **model_format**(FD_C_ModelFormat): Model format. Paddle format by default +> +> **Return** +> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): Pointer to manipulate PPYOLOE object. + + +#### Read and write image + +```c +FD_C_Mat FD_C_Imread(const char* imgpath) +``` + +> Read an image, and return a pointer to cv::Mat. +> +> **Params** +> +> * **imgpath**(const char*): image path +> +> **Return** +> +> * **imgmat**(FD_C_Mat): pointer to cv::Mat object which holds the image. + + +```c +FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img); +``` + +> Write image to a file. +> +> **Params** +> +> * **savepath**(const char*): save path +> * **img**(FD_C_Mat): pointer to cv::Mat object +> +> **Return** +> +> * **result**(FD_C_Bool): bool to indicate success or failure + + +#### Prediction + +```c +FD_C_Bool FD_C_PPYOLOEWrapperPredict( + __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img, + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) +``` +> +> Predict an image, and generate detection result. +> +> **Params** +> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): pointer to manipulate PPYOLOE object +> * **img**(FD_C_Mat): pointer to cv::Mat object, which can be obained by FD_C_Imread interface +> * **result**(FD_C_DetectionResultWrapper*): Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult + + +#### Result + +```c +FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper(); +``` +> +> Create a DetectionResult object to keep the detection result,return a pointer to manipulate it. +> +> **Return** +> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object + + + +```c +FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData( + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) +``` +> +> Get the C DetectionResult structure from FD_C_DetectionResultWrapper, which can access the fileds directly. +> +> **Params** +> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object +> +> **Return** +> * **fd_c_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure + + + +```c +FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result, + float score_threshold, int line_size, float font_size); +``` +> +> Visualize detection results and return visualization image. +> +> **Params** +> * **im**(FD_C_Mat): pointer to input image +> * **fd_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure +> * **score_threshold**(float): score threshold +> * **line_size**(int): line size +> * **font_size**(float): font size +> +> **Return** +> * **vis_im**(FD_C_Mat): pointer to visualization image. + + +- [Model Description](../../) +- [Python Deployment](../python) +- [Vision Model prediction results](../../../../../docs/api/vision_results/) +- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/detection/paddledetection/c/README_CN.md b/examples/vision/detection/paddledetection/c/README_CN.md new file mode 100644 index 000000000..22cbd311c --- /dev/null +++ b/examples/vision/detection/paddledetection/c/README_CN.md @@ -0,0 +1,204 @@ +[English](README.md) | 简体中文 +# PaddleDetection C 部署示例 + +本目录下提供`infer_xxx.c`来调用C API快速完成PaddleDetection模型PPYOLOE在CPU/GPU上部署的示例。 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4) + +```bash +以ppyoloe为例进行推理部署 + +mkdir build +cd build +# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz +tar xvf fastdeploy-linux-x64-x.x.x.tgz +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载PPYOLOE模型文件和测试图片 +wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz +wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg +tar xvf ppyoloe_crn_l_300e_coco.tgz + + +# CPU推理 +./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0 +# GPU推理 +./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1 +``` + +以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: +- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境: +- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md) + +## PaddleDetection C API接口 + +### 配置 + +```c +FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() +``` + +> 创建一个RuntimeOption的配置对象,并且返回操作它的指针。 +> +> **返回** +> +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针 + + +```c +void FD_C_RuntimeOptionWrapperUseCpu( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) +``` + +> 开启CPU推理 +> +> **参数** +> +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针 + +```c +void FD_C_RuntimeOptionWrapperUseGpu( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int gpu_id) +``` +> 开启GPU推理 +> +> **参数** +> +> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针 +> * **gpu_id**(int): 显卡号 + + +### 模型 + +```c + +FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* runtime_option, + const FD_C_ModelFormat model_format) + +``` + +> 创建一个PPYOLOE的模型,并且返回操作它的指针。 +> +> **参数** +> +> * **model_file**(const char*): 模型文件路径 +> * **params_file**(const char*): 参数文件路径 +> * **config_file**(const char*): 配置文件路径,即PaddleDetection导出的部署yaml文件 +> * **runtime_option**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption的指针,表示后端推理配置 +> * **model_format**(FD_C_ModelFormat): 模型格式 +> +> **返回** +> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型对象的指针 + + +#### 读写图像 + +```c +FD_C_Mat FD_C_Imread(const char* imgpath) +``` + +> 读取一个图像,并且返回cv::Mat的指针。 +> +> **参数** +> +> * **imgpath**(const char*): 图像文件路径 +> +> **返回** +> +> * **imgmat**(FD_C_Mat): 指向图像数据cv::Mat的指针。 + + +```c +FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img); +``` + +> 将图像写入文件中。 +> +> **参数** +> +> * **savepath**(const char*): 保存图像的路径 +> * **img**(FD_C_Mat): 指向图像数据的指针 +> +> **返回** +> +> * **result**(FD_C_Bool): 表示操作是否成功 + + +#### Predict函数 + +```c +FD_C_Bool FD_C_PPYOLOEWrapperPredict( + __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img, + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) +``` +> +> 模型预测接口,输入图像直接并生成检测结果。 +> +> **参数** +> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型的指针 +> * **img**(FD_C_Mat): 输入图像的指针,指向cv::Mat对象,可以调用FD_C_Imread读取图像获取 +> * **result**(FD_C_DetectionResultWrapper*): 指向检测结果的指针,检测结果包括检测框,各个框的置信度, DetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/) + + +#### Predict结果 + +```c +FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper(); +``` +> +> 创建一个DetectionResult对象,用来保存推理的结果,并返回所创建的DetectionResult对象的指针。 +> +> **返回** +> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针 + + + +```c +FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData( + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) +``` +> +> 从DetectionResult对象中提取纯C结构的DetectionResult结果,并返回结构指针,通过该指针可直接返回结构中的字段。 +> +> **参数** +> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针 +> +> **返回** +> * **fd_c_detection_result**(FD_C_DetectionResult*): 指向纯C结构的DetectionResult的指针 + + + +```c +FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result, + float score_threshold, int line_size, float font_size); +``` +> +> 对检测结果进行可视化,返回可视化的图像。 +> +> **参数** +> * **im**(FD_C_Mat): 指向输入图像的指针 +> * **fd_detection_result**(FD_C_DetectionResult*): 指向纯C结构DetectionResult的指针 +> * **score_threshold**(float): 检测阈值 +> * **line_size**(int): 检测框线大小 +> * **font_size**(float): 检测框字体大小 +> +> **返回** +> * **vis_im**(FD_C_Mat): 指向可视化图像的指针 + + +- [模型介绍](../../) +- [Python部署](../python) +- [视觉模型预测结果](../../../../../docs/api/vision_results/) +- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/detection/paddledetection/c/infer_ppyoloe.c b/examples/vision/detection/paddledetection/c/infer_ppyoloe.c new file mode 100644 index 000000000..6b9cc4369 --- /dev/null +++ b/examples/vision/detection/paddledetection/c/infer_ppyoloe.c @@ -0,0 +1,124 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "fastdeploy_capi/vision.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void CpuInfer(const char* model_dir, const char* image_file) { + char model_file[100]; + char params_file[100]; + char config_file[100]; + int max_size = 99; + snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel"); + snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams"); + snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml"); + + FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper(); + FD_C_RuntimeOptionWrapperUseCpu(option); + + FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper( + model_file, params_file, config_file, option, PADDLE); + + FD_C_Mat im = FD_C_Imread(image_file); + + FD_C_DetectionResultWrapper* result_wrapper = + FD_C_CreateDetectionResultWrapper(); + + if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) { + printf("Failed to predict.\n"); + return; + } + + FD_C_DetectionResult* result = + FD_C_DetectionResultWrapperGetData(result_wrapper); + FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5); + + FD_C_Imwrite("vis_result.jpg", vis_im); + printf("Visualized result saved in ./vis_result.jpg\n"); + + FD_C_DestroyRuntimeOptionWrapper(option); + FD_C_DestroyPPYOLOEWrapper(model); + FD_C_DestroyDetectionResultWrapper(result_wrapper); + FD_C_DestroyDetectionResult(result); + FD_C_DestroyMat(im); + FD_C_DestroyMat(vis_im); +} + +void GpuInfer(const char* model_dir, const char* image_file) { + char model_file[100]; + char params_file[100]; + char config_file[100]; + int max_size = 99; + snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel"); + snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams"); + snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml"); + + FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper(); + FD_C_RuntimeOptionWrapperUseGpu(option, 0); + + FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper( + model_file, params_file, config_file, option, PADDLE); + + FD_C_Mat im = FD_C_Imread(image_file); + + FD_C_DetectionResultWrapper* result_wrapper = + FD_C_CreateDetectionResultWrapper(); + + if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) { + printf("Failed to predict.\n"); + return; + } + + FD_C_DetectionResult* result = + FD_C_DetectionResultWrapperGetData(result_wrapper); + FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5); + + FD_C_Imwrite("vis_result.jpg", vis_im); + printf("Visualized result saved in ./vis_result.jpg\n"); + + FD_C_DestroyRuntimeOptionWrapper(option); + FD_C_DestroyPPYOLOEWrapper(model); + FD_C_DestroyDetectionResultWrapper(result_wrapper); + FD_C_DestroyDetectionResult(result); + FD_C_DestroyMat(im); + FD_C_DestroyMat(vis_im); +} + +int main(int argc, char* argv[]) { + if (argc < 4) { + printf( + "Usage: infer_demo path/to/model_dir path/to/image run_option, " + "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0" + "\n"); + printf( + "The data type of run_option is int, 0: run with cpu; 1: run with gpu" + "\n"); + return -1; + } + + if (atoi(argv[3]) == 0) { + CpuInfer(argv[1], argv[2]); + } else if (atoi(argv[3]) == 1) { + GpuInfer(argv[1], argv[2]); + } + return 0; +} diff --git a/examples/vision/detection/paddledetection/rknpu2/README_CN.md b/examples/vision/detection/paddledetection/rknpu2/README_CN.md index 8fb3765ba..6a932fe3a 100644 --- a/examples/vision/detection/paddledetection/rknpu2/README_CN.md +++ b/examples/vision/detection/paddledetection/rknpu2/README_CN.md @@ -4,12 +4,14 @@ ## 支持模型列表 -目前FastDeploy使用RKNPU2支持如下PaddleDetection模型的部署: +在RKNPU2上已经通过测试的PaddleDetection模型如下: - Picodet -- PPYOLOE +- PPYOLOE(int8) - YOLOV8 +如果你需要查看详细的速度信息,请查看[RKNPU2模型速度一览表](../../../../../docs/cn/faq/rknpu2/rknpu2.md) + ## 准备PaddleDetection部署模型以及转换模型 RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如下: @@ -20,8 +22,79 @@ RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如 ## 模型转换example -- [Picodet RKNPU2模型转换文档](./picodet.md) -- [YOLOv8 RKNPU2模型转换文档](./yolov8.md) +### 注意点 + +PPDetection模型在RKNPU2上部署时要注意以下几点: + +* 模型导出需要包含Decode +* 由于RKNPU2不支持NMS,因此输出节点必须裁剪至NMS之前 +* 由于RKNPU2 Div算子的限制,模型的输出节点需要裁剪至Div算子之前 + +### Paddle模型转换为ONNX模型 + +由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型,因此需要先将Paddle模型导出为ONNX模型,再将ONNX模型转为RKNN模型。 + +```bash +# 以Picodet为例 +# 下载Paddle静态图模型并解压 +wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar +tar xvf picodet_s_416_coco_lcnet.tar + +# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐 +paddle2onnx --model_dir picodet_s_416_coco_lcnet \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ + --enable_dev_version True + +# 固定shape +python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ + --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ + --input_shape_dict "{'image':[1,3,416,416]}" +``` + +### 编写yaml文件 + +**修改normalize参数** + +如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如: + +```yaml +mean: + - + - 123.675 + - 116.28 + - 103.53 +std: + - + - 58.395 + - 57.12 + - 57.375 +``` + +**修改outputs参数** +由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。 + +例如,使用Netron可视化后,得到以下图片: + +![](https://user-images.githubusercontent.com/58363586/212599781-e1952da7-6eae-4951-8ca7-bab7e6940692.png) + +找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下: + +```yaml +outputs_nodes: + - 'p2o.Mul.179' + - 'p2o.Concat.9' +``` + +### ONNX模型转RKNN模型 + +为了方便大家使用,我们提供了python脚本,通过我们预配置的config文件,你将能够快速地转换ONNX模型到RKNN模型 + +```bash +python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \ + --target_platform rk3588 +``` ## 其他链接 diff --git a/examples/vision/detection/paddledetection/rknpu2/picodet.md b/examples/vision/detection/paddledetection/rknpu2/picodet.md deleted file mode 100644 index de0192648..000000000 --- a/examples/vision/detection/paddledetection/rknpu2/picodet.md +++ /dev/null @@ -1,68 +0,0 @@ -# Picodet RKNPU2模型转换文档 - -以下步骤均在Ubuntu电脑上完成,请参考配置文档完成转换模型环境配置。下面以Picodet-s为例子,教大家如何转换PaddleDetection模型到RKNN模型。 - - -### 导出ONNX模型 - -```bash -# 下载Paddle静态图模型并解压 -wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar -tar xvf picodet_s_416_coco_lcnet.tar - -# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐 -paddle2onnx --model_dir picodet_s_416_coco_lcnet \ - --model_filename model.pdmodel \ - --params_filename model.pdiparams \ - --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ - --enable_dev_version True - -# 固定shape -python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ - --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \ - --input_shape_dict "{'image':[1,3,416,416]}" -``` - -### 编写模型导出配置文件 - -以转化RK3568的RKNN模型为例子,我们需要编辑tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml,来转换ONNX模型到RKNN模型。 - -**修改normalize参数** - -如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如: - -```yaml -mean: - - - - 127.5 - - 127.5 - - 127.5 -std: - - - - 127.5 - - 127.5 - - 127.5 -``` - -**修改outputs参数** -由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。 - -例如,使用Netron可视化后,得到以下图片: - -![](https://user-images.githubusercontent.com/58363586/212599781-e1952da7-6eae-4951-8ca7-bab7e6940692.png) - -找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下: - -```yaml -outputs_nodes: [ 'p2o.Div.79','p2o.Concat.9' ] -``` - -### 转换模型 - -```bash - -# ONNX模型转RKNN模型 -# 转换模型,模型将生成在picodet_s_320_coco_lcnet_non_postprocess目录下 -python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \ - --target_platform rk3588 -``` diff --git a/examples/vision/detection/paddledetection/rknpu2/python/infer.py b/examples/vision/detection/paddledetection/rknpu2/python/infer.py index a3c146531..2dfb54281 100644 --- a/examples/vision/detection/paddledetection/rknpu2/python/infer.py +++ b/examples/vision/detection/paddledetection/rknpu2/python/infer.py @@ -45,15 +45,16 @@ if __name__ == "__main__": # 配置runtime,加载模型 runtime_option = fd.RuntimeOption() - runtime_option.use_cpu() + runtime_option.use_rknpu2() model = fd.vision.detection.PPYOLOE( model_file, params_file, config_file, runtime_option=runtime_option, - model_format=fd.ModelFormat.ONNX) - + model_format=fd.ModelFormat.RKNN) + model.preprocessor.disable_normalize() + model.preprocessor.disable_permute() model.postprocessor.apply_decode_and_nms() # 预测图片分割结果 diff --git a/examples/vision/detection/paddledetection/rknpu2/yolov8.md b/examples/vision/detection/paddledetection/rknpu2/yolov8.md deleted file mode 100644 index 432fe02bb..000000000 --- a/examples/vision/detection/paddledetection/rknpu2/yolov8.md +++ /dev/null @@ -1,50 +0,0 @@ -# YOLOv8 RKNPU2模型转换文档 - -以下步骤均在Ubuntu电脑上完成,请参考配置文档完成转换模型环境配置。下面以yolov8为例子,教大家如何转换PaddleDetection模型到RKNN模型。 - - -### 导出ONNX模型 - -```bash -# 下载Paddle静态图模型并解压 - -# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐 -paddle2onnx --model_dir yolov8_n_500e_coco \ - --model_filename model.pdmodel \ - --params_filename model.pdiparams \ - --save_file yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \ - --enable_dev_version True - -# 固定shape -python -m paddle2onnx.optimize --input_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \ - --output_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \ - --input_shape_dict "{'image':[1,3,640,640],'scale_factor':[1,2]}" -``` - -### 编写模型导出配置文件 -**修改outputs参数** -由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。 - -例如,使用Netron可视化后,得到以下图片: - -![](https://user-images.githubusercontent.com/58363586/212599658-8a2c4b79-f59a-40b5-ade7-f77c6fcfdf2a.png) - -找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.1和p2o.Concat.9,因此需要修改outputs参数,修改后如下: - -```yaml -outputs_nodes: [ 'p2o.Div.1','p2o.Concat.49' ] -``` - -### 转换模型 - -```bash - -# ONNX模型转RKNN模型 -# 转换非全量化模型,模型将生成在yolov8_n目录下 -python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_unquantized.yaml \ - --target_platform rk3588 - -# 转换全量化模型,模型将生成在yolov8_n目录下 -python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_quantized.yaml \ - --target_platform rk3588 -``` diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc index a33db1dc2..8857f10c4 100755 --- a/fastdeploy/benchmark/utils.cc +++ b/fastdeploy/benchmark/utils.cc @@ -13,8 +13,8 @@ // limitations under the License. #include -#if defined(__linux__) || defined(__ANDROID__) -#include +#ifdef __linux__ +#include #endif #include @@ -23,8 +23,7 @@ namespace fastdeploy { namespace benchmark { -// Remove the ch characters at both ends of str -static std::string strip(const std::string& str, char ch = ' ') { +std::string Strip(const std::string& str, char ch) { int i = 0; while (str[i] == ch) { i++; @@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') { return str.substr(i, j + 1 - i); } -// Split string -static void split(const std::string& s, std::vector& tokens, - char delim = ' ') { +void Split(const std::string& s, std::vector& tokens, + char delim) { tokens.clear(); size_t lastPos = s.find_first_not_of(delim, 0); size_t pos = s.find(delim, lastPos); @@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) : is_supported_(false), sampling_interval_(sampling_interval_ms), gpu_id_(gpu_id) { -#if defined(__linux__) || defined(__ANDROID__) +#ifdef __linux__ is_supported_ = true; #else is_supported_ = false; @@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) } void ResourceUsageMonitor::Start() { - if (!is_supported_) return; + if (!is_supported_) { + return; + } if (check_memory_thd_ != nullptr) { FDINFO << "Memory monitoring has already started!" << std::endl; return; @@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() { check_memory_thd_.reset(new std::thread(([this]() { // Note we retrieve the memory usage at the very beginning of the thread. while (true) { - std::string cpu_mem_info = GetCurrentCpuMemoryInfo(); - // get max_cpu_mem - std::vector cpu_tokens; - split(cpu_mem_info, cpu_tokens, ' '); - max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024); +#ifdef __linux__ + rusage res; + if (getrusage(RUSAGE_SELF, &res) == 0) { + max_cpu_mem_ = + std::max(max_cpu_mem_, static_cast(res.ru_maxrss / 1024.0)); + } +#endif #if defined(WITH_GPU) std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_); // get max_gpu_mem and max_gpu_util std::vector gpu_tokens; - split(gpu_mem_info, gpu_tokens, ','); + Split(gpu_mem_info, gpu_tokens, ','); max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6])); max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7])); #endif - if (stop_signal_) break; + if (stop_signal_) { + break; + } std::this_thread::sleep_for( std::chrono::milliseconds(sampling_interval_)); } @@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() { check_memory_thd_.reset(nullptr); } -std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() { - std::string result = ""; -#if defined(__linux__) || defined(__ANDROID__) - int iPid = static_cast(getpid()); - std::string command = "pmap -x " + std::to_string(iPid) + " | grep total"; - FILE* pp = popen(command.data(), "r"); - if (!pp) return ""; - char tmp[1024]; - - while (fgets(tmp, sizeof(tmp), pp) != NULL) { - result += tmp; - } - pclose(pp); -#else - FDASSERT(false, - "Currently collect cpu memory info only supports Linux and ANDROID.") -#endif - return result; -} - std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) { std::string result = ""; #if defined(__linux__) && defined(WITH_GPU) diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h index f81cb29c1..4037cd09c 100755 --- a/fastdeploy/benchmark/utils.h +++ b/fastdeploy/benchmark/utils.h @@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor { private: void StopInternal(); - // Get current cpu memory info - std::string GetCurrentCpuMemoryInfo(); // Get current gpu memory info std::string GetCurrentGpuMemoryInfo(int device_id); bool is_supported_ = false; bool stop_signal_ = false; const int sampling_interval_; - float max_cpu_mem_ = 0.0f; - float max_gpu_mem_ = 0.0f; + float max_cpu_mem_ = 0.0f; // MB + float max_gpu_mem_ = 0.0f; // MB float max_gpu_util_ = 0.0f; const int gpu_id_ = 0; std::unique_ptr check_memory_thd_ = nullptr; }; +// Remove the ch characters at both ends of str +FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' '); + +// Split string +FASTDEPLOY_DECL void Split(const std::string& s, + std::vector& tokens, + char delim = ' '); + } // namespace benchmark } // namespace fastdeploy diff --git a/fastdeploy/pybind/fd_tensor.cc b/fastdeploy/pybind/fd_tensor.cc index 0f1d145b3..6e34019f5 100644 --- a/fastdeploy/pybind/fd_tensor.cc +++ b/fastdeploy/pybind/fd_tensor.cc @@ -15,9 +15,9 @@ #include #include "fastdeploy/core/fd_type.h" -#include "fastdeploy/utils/utils.h" #include "fastdeploy/fastdeploy_model.h" #include "fastdeploy/pybind/main.h" +#include "fastdeploy/utils/utils.h" namespace fastdeploy { @@ -68,8 +68,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) { break; default: - FDASSERT(false, - "Convert to DlPack, FDType \"%s\" is not supported.", Str(fd_dtype).c_str()); + FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.", + Str(fd_dtype).c_str()); } dl_dtype.code = dl_code; @@ -77,10 +77,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) { return dl_dtype; } -FDDataType -DlpackToFDType(const DLDataType& data_type) { - FDASSERT(data_type.lanes == 1, - "FDTensor does not support dlpack lanes != 1") +FDDataType DlpackToFDType(const DLDataType& data_type) { + FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1") if (data_type.code == DLDataTypeCode::kDLFloat) { if (data_type.bits == 16) { @@ -152,7 +150,7 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) { dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype); dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id; - if(fd_tensor.device == Device::GPU) { + if (fd_tensor.device == Device::GPU) { if (fd_tensor.is_pinned_memory) { dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost; } else { @@ -162,8 +160,8 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) { dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU; } - return pybind11::capsule( - static_cast(dlpack_tensor), "dltensor", &DeleteUnusedDltensor); + return pybind11::capsule(static_cast(dlpack_tensor), "dltensor", + &DeleteUnusedDltensor); } FDTensor FDTensorFromDLPack(const std::string& name, @@ -178,9 +176,8 @@ FDTensor FDTensorFromDLPack(const std::string& name, int64_t* strides = dl_managed_tensor->dl_tensor.strides; int ndim = dl_managed_tensor->dl_tensor.ndim; - std::vector dims( - dl_managed_tensor->dl_tensor.shape, - dl_managed_tensor->dl_tensor.shape + ndim); + std::vector dims(dl_managed_tensor->dl_tensor.shape, + dl_managed_tensor->dl_tensor.shape + ndim); // Check if the input is contiguous and in C order if (strides != nullptr) { @@ -196,8 +193,8 @@ FDTensor FDTensorFromDLPack(const std::string& name, } FDASSERT(is_contiguous_c_order, - "DLPack tensor is not contiguous. Only contiguous DLPack " - "tensors that are stored in C-Order are supported."); + "DLPack tensor is not contiguous. Only contiguous DLPack " + "tensors that are stored in C-Order are supported."); } Device device; @@ -216,21 +213,20 @@ FDTensor FDTensorFromDLPack(const std::string& name, is_pinned_memory = true; break; default: - FDASSERT(false, + FDASSERT( + false, ("DLDevice type " + - std::to_string(dl_managed_tensor->dl_tensor.device.device_type) + - " is not support by Python backend.").c_str()); + std::to_string(dl_managed_tensor->dl_tensor.device.device_type) + + " is not support by Python backend.") + .c_str()); break; } - FDDataType dtype = - DlpackToFDType(dl_managed_tensor->dl_tensor.dtype); + FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype); PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack"); FDTensor fd_tensor(name); - fd_tensor.SetExternalData( - dims, dtype, memory_ptr, device, device_id - ); + fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id); fd_tensor.is_pinned_memory = is_pinned_memory; return fd_tensor; } @@ -242,15 +238,52 @@ void BindFDTensor(pybind11::module& m) { .def_readonly("shape", &FDTensor::shape) .def_readonly("dtype", &FDTensor::dtype) .def_readonly("device", &FDTensor::device) - .def("numpy", [](FDTensor& self) { - return TensorToPyArray(self); - }) + .def("numpy", [](FDTensor& self) { return TensorToPyArray(self); }) .def("data", &FDTensor::MutableData) - .def("from_numpy", [](FDTensor& self, pybind11::array& pyarray, bool share_buffer = false) { - PyArrayToTensor(pyarray, &self, share_buffer); - }) + .def("from_numpy", + [](FDTensor& self, pybind11::array& pyarray, + bool share_buffer = false) { + PyArrayToTensor(pyarray, &self, share_buffer); + }) + .def("from_external_data", + [](const std::string& name, size_t data_addr, + const std::vector& shape, const std::string& data_type, + const std::string& data_place, int device_id) { + auto fd_data_type = FDDataType::UNKNOWN1; + if (data_type == "FP32") { + fd_data_type = FDDataType::FP32; + } else if (data_type == "FP16") { + fd_data_type = FDDataType::FP16; + } else if (data_type == "INT32") { + fd_data_type = FDDataType::INT32; + } else if (data_type == "INT64") { + fd_data_type = FDDataType::INT64; + } else { + FDASSERT(false, + "FDTensor.from_external_data, datatype \"%s\" is not " + "supported.", + data_type.c_str()); + } + + Device fd_data_place; + if (data_place.find("gpu") != data_place.npos) { + fd_data_place = Device::GPU; + } else { + FDASSERT(false, + ("Device type " + data_place + + " is not support by FDTensor.from_external_data.") + .c_str()); + } + void* data_ptr = nullptr; + data_ptr = reinterpret_cast(data_addr); + FDTensor fd_tensor(name); + fd_tensor.SetExternalData(shape, fd_data_type, + static_cast(data_ptr), + fd_data_place, device_id); + return fd_tensor; + }) .def("to_dlpack", &FDTensorToDLPack) - .def("from_dlpack",&FDTensorFromDLPack) + .def("from_dlpack", &FDTensorFromDLPack) .def("print_info", &FDTensor::PrintInfo); } diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index ca2f4886b..408c3ced2 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -110,6 +110,7 @@ void BindRuntime(pybind11::module& m) { return outputs; }) .def("bind_input_tensor", &Runtime::BindInputTensor) + .def("bind_output_tensor", &Runtime::BindOutputTensor) .def("infer", [](Runtime& self) { self.Infer(); }) .def("get_output_tensor", [](Runtime& self, const std::string& name) { diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index 4df109991..09dbe812a 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -25,6 +25,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { if (option.device == Device::GPU) { config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id); if (option_.external_stream_) { + FDINFO << "Will use external stream for Paddle Backend." << std::endl; config_.SetExecStream(option_.external_stream_); } if (option.enable_trt) { @@ -226,23 +227,47 @@ bool PaddleBackend::Infer(std::vector& inputs, << inputs_desc_.size() << ")." << std::endl; return false; } + // output share backend memory only support CPU or GPU + if (option_.device == Device::IPU) { + copy_to_fd = true; + } RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN for (size_t i = 0; i < inputs.size(); ++i) { auto handle = predictor_->GetInputHandle(inputs[i].name); ShareTensorFromFDTensor(handle.get(), inputs[i]); } + std::unordered_set prebinded_output_name; + // prebinded output only support for GPU + if (!copy_to_fd) { + for (size_t i = 0; i < (*outputs).size(); ++i) { + auto output_name = (*outputs)[i].name; + // if a output is not prebinded, + // the name of output is expected to be empty. + // We skip here + if (output_name.empty()) { + continue; + } + // Record the prebinded output_name. + // Those outputs do not need PaddleTensorToFDTensor + // after predictor_.Run() + prebinded_output_name.insert(output_name); + auto handle = predictor_->GetOutputHandle(output_name); + ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]); + } + } RUNTIME_PROFILE_LOOP_BEGIN(1) predictor_->Run(); RUNTIME_PROFILE_LOOP_END - // output share backend memory only support CPU or GPU - if (option_.device == Device::IPU) { - copy_to_fd = true; - } outputs->resize(outputs_desc_.size()); for (size_t i = 0; i < outputs_desc_.size(); ++i) { + // skip prebinded output + if (copy_to_fd == false && + prebinded_output_name.count(outputs_desc_[i].name)) { + continue; + } auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name); if (copy_to_fd) { (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory; diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.h b/fastdeploy/runtime/backends/paddle/paddle_backend.h index 02c430ade..60079fed6 100755 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.h +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h @@ -35,6 +35,9 @@ paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device); // Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor); +void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor, + FDTensor& fd_tensor); + // convert paddle_infer::Tensor to fastdeploy::FDTensor // if copy_to_fd is true, copy memory data to FDTensor /// else share memory to FDTensor @@ -89,4 +92,4 @@ class PaddleBackend : public BaseBackend { std::vector inputs_desc_; std::vector outputs_desc_; }; -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/paddle/util.cc b/fastdeploy/runtime/backends/paddle/util.cc index f117a49bc..bd7ff0944 100644 --- a/fastdeploy/runtime/backends/paddle/util.cc +++ b/fastdeploy/runtime/backends/paddle/util.cc @@ -61,6 +61,43 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, Str(fd_tensor.dtype).c_str()); } +void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor, + FDTensor& fd_tensor) { + std::vector shape(fd_tensor.shape.begin(), fd_tensor.shape.end()); + auto place = ConvertFDDeviceToPlace(fd_tensor.device); + if (fd_tensor.dtype == FDDataType::FP32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT32) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::INT64) { + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, place); + } else { + tensor->CopyToCpu(static_cast(fd_tensor.MutableData())); + } + return; + } else if (fd_tensor.dtype == FDDataType::UINT8) { + tensor->ShareExternalData(static_cast(fd_tensor.MutableData()), + shape, paddle_infer::PlaceType::kCPU); + return; + } + FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", + Str(fd_tensor.dtype).c_str()); +} + void PaddleTensorToFDTensor(std::unique_ptr& tensor, FDTensor* fd_tensor, bool copy_to_fd) { auto fd_dtype = PaddleDataTypeToFD(tensor->type()); diff --git a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc index dc7bbbad5..a6b9a386f 100644 --- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc +++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc @@ -67,6 +67,7 @@ bool RKNPU2Backend::GetSDKAndDeviceVersion() { ***************************************************************/ void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) { this->option_ = option; + // save cpu_name this->option_.cpu_name = option.cpu_name; @@ -123,7 +124,7 @@ bool RKNPU2Backend::InitFromRKNN(const std::string& model_file, * @return bool * @note Only support RK3588 ***************************************************************/ -bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const { +bool RKNPU2Backend::SetCoreMask(const rknpu2::CoreMask& core_mask) { int ret = rknn_set_core_mask(ctx, static_cast(core_mask)); if (ret != RKNN_SUCC) { FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl; diff --git a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h index 8b3aa9fe2..52f174d6d 100644 --- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h +++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h @@ -25,7 +25,7 @@ namespace fastdeploy { struct RKNPU2BackendOption { - rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588; + rknpu2::CpuName cpu_name = rknpu2::CpuName::RK356X; // The specification of NPU core setting.It has the following choices : // RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will @@ -49,7 +49,7 @@ class RKNPU2Backend : public BaseBackend { bool GetSDKAndDeviceVersion(); - bool SetCoreMask(rknpu2::CoreMask& core_mask) const; + bool SetCoreMask(const rknpu2::CoreMask& core_mask); bool GetModelInputOutputInfos(); diff --git a/fastdeploy/runtime/option_pybind.cc b/fastdeploy/runtime/option_pybind.cc index 7af90d831..e5443d894 100644 --- a/fastdeploy/runtime/option_pybind.cc +++ b/fastdeploy/runtime/option_pybind.cc @@ -49,6 +49,10 @@ void BindOption(pybind11::module& m) { .def_readwrite("poros_option", &RuntimeOption::poros_option) .def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option) .def("set_external_stream", &RuntimeOption::SetExternalStream) + .def("set_external_raw_stream", + [](RuntimeOption& self, size_t external_stream) { + self.SetExternalStream(reinterpret_cast(external_stream)); + }) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) .def("use_poros_backend", &RuntimeOption::UsePorosBackend) diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index 67774a306..ad84f7261 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -224,6 +224,25 @@ void Runtime::BindInputTensor(const std::string& name, FDTensor& input) { } } +void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) { + bool is_exist = false; + for (auto& t : output_tensors_) { + if (t.name == name) { + FDINFO << "The output name [" << name << "] is exist." << std::endl; + is_exist = true; + t.SetExternalData(output.shape, output.dtype, output.MutableData(), + output.device, output.device_id); + break; + } + } + if (!is_exist) { + FDINFO << "The output name [" << name << "] is prebinded added into output tensor list." << std::endl; + FDTensor new_tensor(name); + new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(), + output.device, output.device_id); + output_tensors_.emplace_back(std::move(new_tensor)); + } +} FDTensor* Runtime::GetOutputTensor(const std::string& name) { for (auto& t : output_tensors_) { if (t.name == name) { diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h index fa8b8f198..4d045684e 100755 --- a/fastdeploy/runtime/runtime.h +++ b/fastdeploy/runtime/runtime.h @@ -75,6 +75,12 @@ struct FASTDEPLOY_DECL Runtime { /** \brief Bind FDTensor by name, no copy and share input memory */ void BindInputTensor(const std::string& name, FDTensor& input); + + /** \brief Bind FDTensor by name, no copy and share output memory. + * Please make share the correctness of tensor shape of output. + */ + void BindOutputTensor(const std::string& name, FDTensor& output); + /** \brief Get output FDTensor by name, no copy and share backend output memory */ FDTensor* GetOutputTensor(const std::string& name); diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index a36ac5459..da8e87e25 100755 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -71,9 +71,9 @@ struct FASTDEPLOY_DECL RuntimeOption { void UseGpu(int gpu_id = 0); /// Use RKNPU2 e.g RK3588/RK356X to inference void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = - fastdeploy::rknpu2::CpuName::RK3588, + fastdeploy::rknpu2::CpuName::RK356X, fastdeploy::rknpu2::CoreMask rknpu2_core = - fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO); /// Use TimVX e.g RV1126/A311D to inference void UseTimVX(); /// Use Huawei Ascend to inference @@ -110,6 +110,7 @@ struct FASTDEPLOY_DECL RuntimeOption { bool enable_multi_stream = false); void SetExternalStream(void* external_stream); + /* * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends */ diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index a9004a15a..6be764ea3 100644 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -72,6 +72,14 @@ class Runtime: """ self._runtime.bind_input_tensor(name, fdtensor) + def bind_output_tensor(self, name, fdtensor): + """Bind FDTensor by name, no copy and share output memory + + :param name: (str)The name of output data. + :param fdtensor: (fastdeploy.FDTensor)The output FDTensor. + """ + self._runtime.bind_output_tensor(name, fdtensor) + def zero_copy_infer(self): """No params inference the model. @@ -657,6 +665,11 @@ class RuntimeOption: """ return self._option.disable_profiling() + def set_external_raw_stream(self, cuda_stream): + """Set the external raw stream used by fastdeploy runtime. + """ + self._option.set_external_raw_stream(cuda_stream) + def __repr__(self): attrs = dir(self._option) message = "RuntimeOption(\n" diff --git a/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml b/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml index aeeb3fdb7..ddcd9c91e 100644 --- a/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml +++ b/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml @@ -10,7 +10,7 @@ std: - 57.375 model_path: ./picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx outputs_nodes: - - 'p2o.Div.79' + - 'p2o.Mul.179' - 'p2o.Concat.9' do_quantization: False dataset: diff --git a/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml b/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml new file mode 100644 index 000000000..965e43cde --- /dev/null +++ b/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml @@ -0,0 +1,17 @@ +mean: + - + - 0 + - 0 + - 0 +std: + - + - 255 + - 255 + - 255 +model_path: ./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco.onnx +outputs_nodes: + - 'p2o.Mul.224' + - 'p2o.Concat.29' +do_quantization: True +dataset: "./ppyoloe_plus_crn_s_80e_coco/dataset.txt" +output_folder: "./ppyoloe_plus_crn_s_80e_coco"