From ef852579a9a63fdea53efc2c765b80e787d74bd7 Mon Sep 17 00:00:00 2001 From: yeliang2258 <30516196+yeliang2258@users.noreply.github.com> Date: Thu, 9 Feb 2023 10:04:04 +0800 Subject: [PATCH 01/14] [Bug Fix] Fix bugs in acc eval (#1276) fix bugs in acc eval --- tests/acc_eval/classification/run.sh | 2 +- tests/acc_eval/detection/eval_yolov5.py | 4 ++-- tests/acc_eval/detection/eval_yolov6.py | 4 ++-- tests/acc_eval/detection/eval_yolov7.py | 4 ++-- tests/acc_eval/detection/run.sh | 6 +++--- tests/acc_eval/ppocr/eval_ppocrv2.py | 2 +- tests/acc_eval/ppocr/eval_ppocrv3.py | 2 +- tests/acc_eval/segmentation/eval.py | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) mode change 100644 => 100755 tests/acc_eval/classification/run.sh mode change 100644 => 100755 tests/acc_eval/detection/run.sh mode change 100644 => 100755 tests/acc_eval/ppocr/eval_ppocrv2.py mode change 100644 => 100755 tests/acc_eval/ppocr/eval_ppocrv3.py mode change 100644 => 100755 tests/acc_eval/segmentation/eval.py diff --git a/tests/acc_eval/classification/run.sh b/tests/acc_eval/classification/run.sh old mode 100644 new mode 100755 index 16c1b2bb9..73fe957d4 --- a/tests/acc_eval/classification/run.sh +++ b/tests/acc_eval/classification/run.sh @@ -4,5 +4,5 @@ model_dir=`ls ./models/` for MODEL_NAME in $model_dir do - python infer.py --model ./models/$MODEL_NAME --image None --device $TARGET_DEVICE 2>&1 | tee ./log/${MODEL_NAME}_acc.log + python eval.py --model ./models/$MODEL_NAME --image None --device $TARGET_DEVICE 2>&1 | tee ./log/${MODEL_NAME}_acc.log done diff --git a/tests/acc_eval/detection/eval_yolov5.py b/tests/acc_eval/detection/eval_yolov5.py index 3d950b26a..f4aecbdc1 100755 --- a/tests/acc_eval/detection/eval_yolov5.py +++ b/tests/acc_eval/detection/eval_yolov5.py @@ -52,8 +52,8 @@ model = fd.vision.detection.YOLOv5( runtime_option=runtime_option, model_format=fd.ModelFormat.PADDLE) -image_file_path = "/xieyunyao/Project/coco/val2017" -annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" res = fd.vision.evaluation.eval_detection(model, image_file_path, annotation_file_path, 0.001, 0.65) diff --git a/tests/acc_eval/detection/eval_yolov6.py b/tests/acc_eval/detection/eval_yolov6.py index 3641194ca..3992c9f53 100755 --- a/tests/acc_eval/detection/eval_yolov6.py +++ b/tests/acc_eval/detection/eval_yolov6.py @@ -52,8 +52,8 @@ model = fd.vision.detection.YOLOv6( runtime_option=runtime_option, model_format=fd.ModelFormat.PADDLE) -image_file_path = "/xieyunyao/Project/coco/val2017" -annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" res = fd.vision.evaluation.eval_detection(model, image_file_path, annotation_file_path, 0.001, 0.65) diff --git a/tests/acc_eval/detection/eval_yolov7.py b/tests/acc_eval/detection/eval_yolov7.py index 3641194ca..3992c9f53 100755 --- a/tests/acc_eval/detection/eval_yolov7.py +++ b/tests/acc_eval/detection/eval_yolov7.py @@ -52,8 +52,8 @@ model = fd.vision.detection.YOLOv6( runtime_option=runtime_option, model_format=fd.ModelFormat.PADDLE) -image_file_path = "/xieyunyao/Project/coco/val2017" -annotation_file_path = "/xieyunyao/Project/coco/annotations/instances_val2017.json" +image_file_path = "../dataset/coco/val2017" +annotation_file_path = "../dataset/coco/annotations/instances_val2017.json" res = fd.vision.evaluation.eval_detection(model, image_file_path, annotation_file_path, 0.001, 0.65) diff --git a/tests/acc_eval/detection/run.sh b/tests/acc_eval/detection/run.sh old mode 100644 new mode 100755 index 59dff2e9b..051663215 --- a/tests/acc_eval/detection/run.sh +++ b/tests/acc_eval/detection/run.sh @@ -12,6 +12,6 @@ python eval_yolov3.py --model_dir ./models/yolov3_darknet53_270e_coco --image python eval_yolox.py --model_dir ./models/yolox_s_300e_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolox_s_300e_coco.log python eval_faster_rcnn.py --model_dir ./models/faster_rcnn_r50_vd_fpn_2x_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/faster_rcnn_r50_vd_fpn_2x_coco.log python eval_mask_rcnn.py --model_dir ./models/mask_rcnn_r50_1x_coco --image None --device $TARGET_DEVICE 2>&1 | tee ./log/mask_rcnn_r50_1x_coco.log -python eval_yolov5.py --model_dir ./models/yolov5s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov5s_infer.log -python eval_yolov6.py --model_dir ./models/yolov6s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov6s_infer.log -python eval_yolov5.py --model_dir ./models/yolov7_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov7_infer.log +python eval_yolov5.py --model ./models/yolov5s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov5s_infer.log +python eval_yolov6.py --model ./models/yolov6s_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov6s_infer.log +python eval_yolov7.py --model ./models/yolov7_infer --image None --device $TARGET_DEVICE 2>&1 | tee ./log/yolov7_infer.log diff --git a/tests/acc_eval/ppocr/eval_ppocrv2.py b/tests/acc_eval/ppocr/eval_ppocrv2.py old mode 100644 new mode 100755 index f4742df66..bb478db91 --- a/tests/acc_eval/ppocr/eval_ppocrv2.py +++ b/tests/acc_eval/ppocr/eval_ppocrv2.py @@ -103,7 +103,7 @@ rec_model = fd.vision.ocr.Recognizer( runtime_option=runtime_option) # PPOCR的Rec模型开启静态推理, 其他硬件不需要的话请注释掉. -rec_model.preprocessor.static_shape = True +rec_model.preprocessor.static_shape_infer = True # 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None ppocr_v2 = fd.vision.ocr.PPOCRv2( diff --git a/tests/acc_eval/ppocr/eval_ppocrv3.py b/tests/acc_eval/ppocr/eval_ppocrv3.py old mode 100644 new mode 100755 index b6f4dcced..496781ba0 --- a/tests/acc_eval/ppocr/eval_ppocrv3.py +++ b/tests/acc_eval/ppocr/eval_ppocrv3.py @@ -103,7 +103,7 @@ rec_model = fd.vision.ocr.Recognizer( runtime_option=runtime_option) # PPOCR的Rec模型开启静态推理, 其他硬件不需要的话请注释掉. -rec_model.preprocessor.static_shape = True +rec_model.preprocessor.static_shape_infer = True # 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None ppocr_v3 = fd.vision.ocr.PPOCRv3( diff --git a/tests/acc_eval/segmentation/eval.py b/tests/acc_eval/segmentation/eval.py old mode 100644 new mode 100755 index b77a69519..df0dc0aa8 --- a/tests/acc_eval/segmentation/eval.py +++ b/tests/acc_eval/segmentation/eval.py @@ -54,5 +54,5 @@ model = fd.vision.segmentation.PaddleSegModel( model_file, params_file, config_file, runtime_option=runtime_option) res = fd.vision.evaluation.eval_segmentation( - model=model, data_dir="../dataset/FD_dataset/data/cityscapes") + model=model, data_dir="../dataset/cityscapes") print(res) From b8afb0d04015495bcdd75f95d96fd06f46994c2e Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 9 Feb 2023 10:04:18 +0800 Subject: [PATCH 02/14] [Other] Move comments for deprecated functions (#1275) Move comments for deprecated functions --- fastdeploy/runtime/backends/paddle/option.h | 10 + .../runtime/backends/paddle/option_pybind.cc | 3 +- fastdeploy/runtime/runtime_option.cc | 10 - fastdeploy/runtime/runtime_option.h | 345 +++++------------- python/fastdeploy/runtime.py | 3 +- 5 files changed, 104 insertions(+), 267 deletions(-) diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h index 29556f877..749a35705 100644 --- a/fastdeploy/runtime/backends/paddle/option.h +++ b/fastdeploy/runtime/backends/paddle/option.h @@ -75,6 +75,16 @@ struct PaddleBackendOption { delete_pass_names.push_back(pass_name); } + void SetIpuConfig(bool enable_fp16, int replica_num, + float available_memory_proportion, + bool enable_half_partial) { + ipu_option.ipu_enable_fp16 = enable_fp16; + ipu_option.ipu_replica_num = replica_num; + ipu_option.ipu_available_memory_proportion = + available_memory_proportion; + ipu_option.ipu_enable_half_partial = enable_half_partial; + } + // The belowing parameters may be removed, please do not // read or write them directly TrtBackendOption trt_option; diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc index 5e2eb06c7..50b34ca61 100644 --- a/fastdeploy/runtime/backends/paddle/option_pybind.cc +++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc @@ -47,7 +47,8 @@ void BindPaddleOption(pybind11::module& m) { .def_readwrite("gpu_mem_init_size", &PaddleBackendOption::gpu_mem_init_size) .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps) - .def("delete_pass", &PaddleBackendOption::DeletePass); + .def("delete_pass", &PaddleBackendOption::DeletePass) + .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig); } } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 7538f3ea6..c09352d58 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -458,14 +458,4 @@ void RuntimeOption::UseIpu(int device_num, int micro_batch_size, #endif } -void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num, - float available_memory_proportion, - bool enable_half_partial) { - paddle_infer_option.ipu_option.ipu_enable_fp16 = enable_fp16; - paddle_infer_option.ipu_option.ipu_replica_num = replica_num; - paddle_infer_option.ipu_option.ipu_available_memory_proportion = - available_memory_proportion; - paddle_infer_option.ipu_option.ipu_enable_half_partial = enable_half_partial; -} - } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index ecb51fe2a..0aa6bbec8 100644 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -61,22 +61,19 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Use cpu to inference, the runtime will inference on CPU by default void UseCpu(); - /// Use Nvidia GPU to inference void UseGpu(int gpu_id = 0); - + /// Use RKNPU2 e.g RK3588/RK356X to inference void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588, fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); - - /// Use TimVX to inference + /// Use TimVX e.g RV1126/A311D to inference void UseTimVX(); - /// Use Huawei Ascend to inference void UseAscend(); - - /// + /// Use Sophgo to inference + void UseSophgo(); /// \brief Turn on KunlunXin XPU. /// /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0). @@ -106,221 +103,25 @@ struct FASTDEPLOY_DECL RuntimeOption { bool adaptive_seqlen = false, bool enable_multi_stream = false); - /// Use Sophgo to inference - void UseSophgo(); - void SetExternalStream(void* external_stream); - /* * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends */ void SetCpuThreadNum(int thread_num); - - /// Set ORT graph opt level, default is decide by ONNX Runtime itself - void SetOrtGraphOptLevel(int level = -1); - /// Set Paddle Inference as inference backend, support CPU/GPU - void UsePaddleBackend(); - - /// Wrapper function of UsePaddleBackend() void UsePaddleInferBackend() { return UsePaddleBackend(); } - /// Set ONNX Runtime as inference backend, support CPU/GPU void UseOrtBackend(); - - /// Set SOPHGO Runtime as inference backend, support CPU/GPU + /// Set SOPHGO Runtime as inference backend, support SOPHGO void UseSophgoBackend(); - /// Set TensorRT as inference backend, only support GPU void UseTrtBackend(); - /// Set Poros backend as inference backend, support CPU/GPU void UsePorosBackend(); - /// Set OpenVINO as inference backend, only support CPU void UseOpenVINOBackend(); - /// Set Paddle Lite as inference backend, only support arm cpu - void UseLiteBackend(); - - /// Wrapper function of UseLiteBackend() void UsePaddleLiteBackend() { return UseLiteBackend(); } - - /// Set mkldnn switch while using Paddle Inference as inference backend - void SetPaddleMKLDNN(bool pd_mkldnn = true); - - /* - * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead. - */ - void EnablePaddleToTrt(); - - /** - * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes - */ - void DeletePaddleBackendPass(const std::string& delete_pass_name); - - /** - * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default - */ - void EnablePaddleLogInfo(); - - /** - * @brief Disable print debug information while using Paddle Inference as inference backend - */ - void DisablePaddleLogInfo(); - - /** - * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape - */ - void SetPaddleMKLDNNCacheSize(int size); - - /** - * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... - */ - void SetOpenVINODevice(const std::string& name = "CPU"); - - /** - * @brief Set shape info for OpenVINO - */ - void SetOpenVINOShapeInfo( - const std::map>& shape_info) { - openvino_option.shape_infos = shape_info; - } - - /** - * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU - */ - void SetOpenVINOCpuOperators(const std::vector& operators) { - openvino_option.SetCpuOperators(operators); - } - - /** - * @brief Set optimzed model dir for Paddle Lite backend. - */ - void SetLiteOptimizedModelDir(const std::string& optimized_model_dir); - - /** - * @brief Set subgraph partition path for Paddle Lite backend. - */ - void SetLiteSubgraphPartitionPath( - const std::string& nnadapter_subgraph_partition_config_path); - - /** - * @brief Set subgraph partition path for Paddle Lite backend. - */ - void SetLiteSubgraphPartitionConfigBuffer( - const std::string& nnadapter_subgraph_partition_config_buffer); - - /** - * @brief Set context properties for Paddle Lite backend. - */ - void - SetLiteContextProperties(const std::string& nnadapter_context_properties); - - /** - * @brief Set model cache dir for Paddle Lite backend. - */ - void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir); - - /** - * @brief Set dynamic shape info for Paddle Lite backend. - */ - void SetLiteDynamicShapeInfo( - const std::map>>& - nnadapter_dynamic_shape_info); - - /** - * @brief Set mixed precision quantization config path for Paddle Lite backend. - */ - void SetLiteMixedPrecisionQuantizationConfigPath( - const std::string& nnadapter_mixed_precision_quantization_config_path); - - /** - * @brief enable half precision while use paddle lite backend - */ - void EnableLiteFP16(); - - /** - * @brief disable half precision, change to full precision(float32) - */ - void DisableLiteFP16(); - - /** - * @brief enable int8 precision while use paddle lite backend - */ - void EnableLiteInt8(); - - /** - * @brief disable int8 precision, change to full precision(float32) - */ - void DisableLiteInt8(); - - /** - * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details) - */ - void SetLitePowerMode(LitePowerMode mode); - - /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend - * - * \param[in] input_name The name of input for the model which is dynamic shape - * \param[in] min_shape The minimal shape for the input tensor - * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape - * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape - */ - void SetTrtInputShape( - const std::string& input_name, const std::vector& min_shape, - const std::vector& opt_shape = std::vector(), - const std::vector& max_shape = std::vector()); - - /// Set max_workspace_size for TensorRT, default 1<<30 - void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); - - /// Set max_batch_size for TensorRT, default 32 - void SetTrtMaxBatchSize(size_t max_batch_size); - - /** - * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly - */ - void EnableTrtFP16(); - - /// Disable FP16 inference while using TensorRT backend - void DisableTrtFP16(); - - /** - * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again - */ - void SetTrtCacheFile(const std::string& cache_file_path); - - /** - * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend. - */ - void EnablePinnedMemory(); - - /** - * @brief Disable pinned memory - */ - void DisablePinnedMemory(); - - /** - * @brief Enable to collect shape in paddle trt backend - */ - void EnablePaddleTrtCollectShape(); - - /** - * @brief Disable to collect shape in paddle trt backend - */ - void DisablePaddleTrtCollectShape(); - - /** - * @brief Prevent ops running in paddle trt backend - */ - void DisablePaddleTrtOPs(const std::vector& ops); - - /* - * @brief Set number of streams by the OpenVINO backends - */ - void SetOpenVINOStreams(int num_streams); - /** \Use Graphcore IPU to inference. * * \param[in] device_num the number of IPUs. @@ -331,16 +132,18 @@ struct FASTDEPLOY_DECL RuntimeOption { void UseIpu(int device_num = 1, int micro_batch_size = 1, bool enable_pipelining = false, int batches_per_step = 1); - /** \brief Set IPU config. - * - * \param[in] enable_fp16 enable fp16. - * \param[in] replica_num the number of graph replication. - * \param[in] available_memory_proportion the available memory proportion for matmul/conv. - * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16. - */ - void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1, - float available_memory_proportion = 1.0, - bool enable_half_partial = false); + /// Option to configure ONNX Runtime backend + OrtBackendOption ort_option; + /// Option to configure TensorRT backend + TrtBackendOption trt_option; + /// Option to configure Paddle Inference backend + PaddleBackendOption paddle_infer_option; + /// Option to configure Poros backend + PorosBackendOption poros_option; + /// Option to configure OpenVINO backend + OpenVINOBackendOption openvino_option; + /// Option to configure Paddle Lite backend + LiteBackendOption paddle_lite_option; /** \brief Set the profile mode as 'true'. * @@ -362,46 +165,9 @@ struct FASTDEPLOY_DECL RuntimeOption { benchmark_option.enable_profile = false; } - Backend backend = Backend::UNKNOWN; - // for cpu inference - // default will let the backend choose their own default value - int cpu_thread_num = -1; - int device_id = 0; - - Device device = Device::CPU; - - void* external_stream_ = nullptr; - - bool enable_pinned_memory = false; - - /// Option to configure ONNX Runtime backend - OrtBackendOption ort_option; - - /// Option to configure TensorRT backend - TrtBackendOption trt_option; - - /// Option to configure Paddle Inference backend - PaddleBackendOption paddle_infer_option; - - // ======Only for PaddleTrt Backend======= - std::vector trt_disabled_ops_{}; - - /// Option to configure Poros backend - PorosBackendOption poros_option; - - /// Option to configure OpenVINO backend - OpenVINOBackendOption openvino_option; - - // ======Only for RKNPU2 Backend======= - fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = - fastdeploy::rknpu2::CpuName::RK3588; - fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = - fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; - - - /// Option to configure Paddle Lite backend - LiteBackendOption paddle_lite_option; + /// Benchmark option + benchmark::BenchmarkOption benchmark_option; // If model_from_memory is true, the model_file and params_file is // binary stream in memory; @@ -412,8 +178,77 @@ struct FASTDEPLOY_DECL RuntimeOption { /// format of input model ModelFormat model_format = ModelFormat::PADDLE; - /// Benchmark option - benchmark::BenchmarkOption benchmark_option; + // for cpu inference + // default will let the backend choose their own default value + int cpu_thread_num = -1; + int device_id = 0; + Backend backend = Backend::UNKNOWN; + + Device device = Device::CPU; + + void* external_stream_ = nullptr; + + bool enable_pinned_memory = false; + + // ======Only for RKNPU2 Backend======= + fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = + fastdeploy::rknpu2::CpuName::RK3588; + fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; + + // *** The belowing api are deprecated, will be removed in v1.2.0 + // *** Do not use it anymore + + void SetPaddleMKLDNN(bool pd_mkldnn = true); + void EnablePaddleToTrt(); + void DeletePaddleBackendPass(const std::string& delete_pass_name); + void EnablePaddleLogInfo(); + void DisablePaddleLogInfo(); + void SetPaddleMKLDNNCacheSize(int size); + void SetOpenVINODevice(const std::string& name = "CPU"); + void SetOpenVINOShapeInfo( + const std::map>& shape_info) { + openvino_option.shape_infos = shape_info; + } + void SetOpenVINOCpuOperators(const std::vector& operators) { + openvino_option.SetCpuOperators(operators); + } + void SetLiteOptimizedModelDir(const std::string& optimized_model_dir); + void SetLiteSubgraphPartitionPath( + const std::string& nnadapter_subgraph_partition_config_path); + void SetLiteSubgraphPartitionConfigBuffer( + const std::string& nnadapter_subgraph_partition_config_buffer); + void + SetLiteContextProperties(const std::string& nnadapter_context_properties); + void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir); + void SetLiteDynamicShapeInfo( + const std::map>>& + nnadapter_dynamic_shape_info); + void SetLiteMixedPrecisionQuantizationConfigPath( + const std::string& nnadapter_mixed_precision_quantization_config_path); + void EnableLiteFP16(); + void DisableLiteFP16(); + void EnableLiteInt8(); + void DisableLiteInt8(); + void SetLitePowerMode(LitePowerMode mode); + void SetTrtInputShape( + const std::string& input_name, const std::vector& min_shape, + const std::vector& opt_shape = std::vector(), + const std::vector& max_shape = std::vector()); + void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); + void SetTrtMaxBatchSize(size_t max_batch_size); + void EnableTrtFP16(); + void DisableTrtFP16(); + void SetTrtCacheFile(const std::string& cache_file_path); + void EnablePinnedMemory(); + void DisablePinnedMemory(); + void EnablePaddleTrtCollectShape(); + void DisablePaddleTrtCollectShape(); + void DisablePaddleTrtOPs(const std::vector& ops); + void SetOpenVINOStreams(int num_streams); + void SetOrtGraphOptLevel(int level = -1); + void UsePaddleBackend(); + void UseLiteBackend(); }; } // namespace fastdeploy diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index 47659c98c..cd7b6641b 100644 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -583,7 +583,8 @@ class RuntimeOption: replica_num=1, available_memory_proportion=1.0, enable_half_partial=False): - return self._option.set_ipu_config(enable_fp16, replica_num, + logging.warning("`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead.") + self._option.paddle_infer_option.set_ipu_config(enable_fp16, replica_num, available_memory_proportion, enable_half_partial) From 3eb571a0470b6508b2b0681a420c32a9fbdd9251 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 9 Feb 2023 10:19:40 +0800 Subject: [PATCH 03/14] [C API] Add c api to fastdeploy (#1217) * add c api to test ppclas model * fix * add ppdet * refactor c api code * remove local test make * fix a bug * fix according to review * format file * format file * format file * add types_internal.cc --------- Co-authored-by: heliqi <1101791222@qq.com> --- CMakeLists.txt | 9 + c_api/CMakeLists.txt | 28 + c_api/fastdeploy_capi/fd_common.h | 100 ++++ c_api/fastdeploy_capi/fd_type.h | 67 +++ c_api/fastdeploy_capi/runtime_option.cc | 418 ++++++++++++++ c_api/fastdeploy_capi/runtime_option.h | 517 ++++++++++++++++++ c_api/fastdeploy_capi/types_internal.cc | 63 +++ c_api/fastdeploy_capi/types_internal.h | 70 +++ .../vision/classification/ppcls/model.cc | 53 ++ .../vision/classification/ppcls/model.h | 66 +++ .../vision/detection/ppdet/model.cc | 53 ++ .../vision/detection/ppdet/model.h | 67 +++ c_api/fastdeploy_capi/vision/result.cc | 238 ++++++++ c_api/fastdeploy_capi/vision/result.h | 161 ++++++ c_api/fastdeploy_capi/vision/visualize.cc | 35 ++ c_api/fastdeploy_capi/vision/visualize.h | 36 ++ cmake/summary.cmake | 1 + 17 files changed, 1982 insertions(+) create mode 100644 c_api/CMakeLists.txt create mode 100644 c_api/fastdeploy_capi/fd_common.h create mode 100644 c_api/fastdeploy_capi/fd_type.h create mode 100644 c_api/fastdeploy_capi/runtime_option.cc create mode 100644 c_api/fastdeploy_capi/runtime_option.h create mode 100644 c_api/fastdeploy_capi/types_internal.cc create mode 100644 c_api/fastdeploy_capi/types_internal.h create mode 100644 c_api/fastdeploy_capi/vision/classification/ppcls/model.cc create mode 100644 c_api/fastdeploy_capi/vision/classification/ppcls/model.h create mode 100644 c_api/fastdeploy_capi/vision/detection/ppdet/model.cc create mode 100644 c_api/fastdeploy_capi/vision/detection/ppdet/model.h create mode 100644 c_api/fastdeploy_capi/vision/result.cc create mode 100644 c_api/fastdeploy_capi/vision/result.h create mode 100644 c_api/fastdeploy_capi/vision/visualize.cc create mode 100644 c_api/fastdeploy_capi/vision/visualize.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1180fce3b..51ba10c83 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF) option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF) option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF) option(WITH_TESTING "Whether to compile with unittest." OFF) +option(WITH_CAPI "Whether to compile with c api." OFF) ############################# Options for Android cross compiling ######################### if(ANDROID) @@ -416,6 +417,14 @@ if(ENABLE_PADDLE2ONNX) list(APPEND DEPEND_LIBS external_paddle2onnx) endif(ENABLE_PADDLE2ONNX) +if(WITH_CAPI) + include(${PROJECT_SOURCE_DIR}/c_api/CMakeLists.txt) + if(MSVC) + add_definitions(-DFD_CAPI) + endif() +endif() + + configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY) configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py) configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py) diff --git a/c_api/CMakeLists.txt b/c_api/CMakeLists.txt new file mode 100644 index 000000000..7c7a16626 --- /dev/null +++ b/c_api/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +##################################### Building: FastDeploy C API ####################################### +message("----start--CAPI-------") + +if(NOT WITH_CAPI) + return() +endif() + +file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc) +if(NOT ENABLE_VISION) + file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc) + list(REMOVE_ITEM DEPLOY_CAPI_SRCS ${DEPLOY_VISION_CAPI_SRCS}) +endif() +list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_CAPI_SRCS}) +include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api) diff --git a/c_api/fastdeploy_capi/fd_common.h b/c_api/fastdeploy_capi/fd_common.h new file mode 100644 index 000000000..6374cf9b5 --- /dev/null +++ b/c_api/fastdeploy_capi/fd_common.h @@ -0,0 +1,100 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#if defined(_WIN32) +#ifdef FD_CAPI +#define FASTDEPLOY_CAPI_EXPORT __declspec(dllexport) +#else +#define FASTDEPLOY_CAPI_EXPORT __declspec(dllimport) +#endif // FD_CAPI +#else +#define FASTDEPLOY_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 + +/// +/// __fd_give means that a new object is returned. The user should make sure +/// that the returned pointer is used exactly once as a value for an __fd_take +/// argument. In between, it can be used as a value for as many __fd_keep +/// arguments as the user likes. +/// +#ifndef __fd_give +#define __fd_give +#endif +/// +/// __fd_take means that the object the argument points to is taken over by the +/// function and may no longer be used by the user as an argument to any other +/// function. The pointer value must be one returned by a function returning an +/// __fd_give pointer. +/// +#ifndef __fd_take +#define __fd_take +#endif +/// +/// __fd_keep means that the function will only use the object temporarily. The +/// object which the argument points to is not taken over by the function. After +/// the function has finished, the user can still use it as an argument to other +/// functions. +/// +#ifndef __fd_keep +#define __fd_keep +#endif + +typedef int8_t FD_C_Bool; +#define TRUE 1 +#define FALSE 0 + +#define FD_ENUM(type) \ + typedef int32_t type; \ + enum + +FD_ENUM(FD_C_ModelFormat){ + AUTOREC, ///< Auto recognize the model format by model file name + PADDLE, ///< Model with paddlepaddle format + ONNX, ///< Model with ONNX format + RKNN, ///< Model with RKNN format + TORCHSCRIPT, ///< Model with TorchScript format + SOPHGO, ///< Model with SOPHGO format +}; + +FD_ENUM(FD_C_rknpu2_CpuName){ + RK356X = 0, /* run on RK356X. */ + RK3588 = 1, /* default,run on RK3588. */ + UNDEFINED, +}; + +FD_ENUM(FD_C_rknpu2_CoreMask){ + RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly. + RKNN_NPU_CORE_0 = 1, //< run on NPU core 0. + RKNN_NPU_CORE_1 = 2, //< run on NPU core 1. + RKNN_NPU_CORE_2 = 4, //< run on NPU core 2. + RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 | + RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2. + RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 | + RKNN_NPU_CORE_2, //< run on NPU core 1 and core 2. + RKNN_NPU_CORE_UNDEFINED, +}; + +FD_ENUM(FD_C_LitePowerMode){ + LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode + LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode + LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode + LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode + LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode + LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode +}; diff --git a/c_api/fastdeploy_capi/fd_type.h b/c_api/fastdeploy_capi/fd_type.h new file mode 100644 index 000000000..75daf9db6 --- /dev/null +++ b/c_api/fastdeploy_capi/fd_type.h @@ -0,0 +1,67 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "fastdeploy_capi/fd_common.h" // NOLINT + +typedef struct FD_C_OneDimArrayUint8 { + size_t size; + uint8_t* data; +} FD_C_OneDimArrayUint8; // std::vector + +typedef struct FD_C_OneDimArrayInt32 { + size_t size; + int32_t* data; +} FD_C_OneDimArrayInt32; // std::vector + +typedef struct FD_C_OneDimArraySize { + size_t size; + size_t* data; +} FD_C_OneDimArraySize; // std::vector + +typedef struct FD_C_OneDimArrayInt64 { + size_t size; + int64_t* data; +} FD_C_OneDimArrayInt64; // std::vector + +typedef struct FD_C_OneDimArrayFloat { + size_t size; + float* data; +} FD_C_OneDimArrayFloat; // std::vector + +typedef struct FD_C_Cstr { + size_t size; + char* data; +} FD_C_Cstr; // std::string + +typedef struct FD_C_OneDimArrayCstr { + size_t size; + FD_C_Cstr* data; +} FD_C_OneDimArrayCstr; // std::vector + +typedef struct FD_C_TwoDimArraySize { + size_t size; + FD_C_OneDimArraySize* data; +} FD_C_TwoDimArraySize; // std::vector> + +typedef struct FD_C_TwoDimArrayFloat { + size_t size; + FD_C_OneDimArrayFloat* data; +} FD_C_TwoDimArrayFloat; // std::vector> + +typedef void* FD_C_Mat; diff --git a/c_api/fastdeploy_capi/runtime_option.cc b/c_api/fastdeploy_capi/runtime_option.cc new file mode 100644 index 000000000..3c9b4022d --- /dev/null +++ b/c_api/fastdeploy_capi/runtime_option.cc @@ -0,0 +1,418 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/runtime_option.h" + +#include "fastdeploy/utils/utils.h" +#include "fastdeploy_capi/types_internal.h" + +extern "C" { + +FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() { + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper = + new FD_C_RuntimeOptionWrapper(); + fd_c_runtime_option_wrapper->runtime_option = + std::unique_ptr( + new fastdeploy::RuntimeOption()); + return fd_c_runtime_option_wrapper; +} + +void FD_C_DestroyRuntimeOption( + __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + delete fd_c_runtime_option_wrapper; +} + +void FD_C_RuntimeOptionWrapperSetModelPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* model_path, const char* params_path, + const FD_C_ModelFormat format) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetModelPath(std::string(model_path), + std::string(params_path), + static_cast(format)); +} + +void FD_C_RuntimeOptionWrapperSetModelBuffer( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* model_buffer, const char* params_buffer, + const FD_C_ModelFormat format) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetModelBuffer(model_buffer, params_buffer, + static_cast(format)); +} + +void FD_C_RuntimeOptionWrapperUseCpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseCpu(); +} + +void FD_C_RuntimeOptionWrapperUseGpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int gpu_id) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseGpu(gpu_id); +} + +void FD_C_RuntimeOptionWrapperUseRKNPU2( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_rknpu2_CpuName rknpu2_name, FD_C_rknpu2_CoreMask rknpu2_core) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseRKNPU2( + static_cast(rknpu2_name), + static_cast(rknpu2_core)); +} + +void FD_C_RuntimeOptionWrapperUseTimVX( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseTimVX(); +} + +void FD_C_RuntimeOptionWrapperUseAscend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseAscend(); +} + +void FD_C_RuntimeOptionWrapperUseKunlunXin( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, + FD_C_Bool autotune, const char* autotune_file, const char* precision, + FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked), + bool(autotune), std::string(autotune_file), + std::string(precision), bool(adaptive_seqlen), + bool(enable_multi_stream)); +} + +void FD_C_RuntimeOptionWrapperUseSophgo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseSophgo(); +} + +void FD_C_RuntimeOptionWrapperSetExternalStream( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + void* external_stream) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetExternalStream(external_stream); +} + +void FD_C_RuntimeOptionWrapperSetCpuThreadNum( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int thread_num) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetCpuThreadNum(thread_num); +} + +void FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int level) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetOrtGraphOptLevel(level); +} + +void FD_C_RuntimeOptionWrapperUsePaddleBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UsePaddleBackend(); +} + +void FD_C_RuntimeOptionWrapperUsePaddleInferBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + return FD_C_RuntimeOptionWrapperUsePaddleBackend(fd_c_runtime_option_wrapper); +} + +void FD_C_RuntimeOptionWrapperUseOrtBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseOrtBackend(); +} + +void FD_C_RuntimeOptionWrapperUseSophgoBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseSophgoBackend(); +} + +void FD_C_RuntimeOptionWrapperUseTrtBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseTrtBackend(); +} + +void FD_C_RuntimeOptionWrapperUsePorosBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UsePorosBackend(); +} + +void FD_C_RuntimeOptionWrapperUseOpenVINOBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseOpenVINOBackend(); +} + +void FD_C_RuntimeOptionWrapperUseLiteBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseLiteBackend(); +} + +void FD_C_RuntimeOptionWrapperUsePaddleLiteBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + return FD_C_RuntimeOptionWrapperUseLiteBackend(fd_c_runtime_option_wrapper); +} + +void FD_C_RuntimeOptionWrapperSetPaddleMKLDNN( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_Bool pd_mkldnn) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetPaddleMKLDNN(pd_mkldnn); +} + +void FD_C_RuntimeOptionWrapperEnablePaddleToTrt( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnablePaddleToTrt(); +} + +void FD_C_RuntimeOptionWrapperDeletePaddleBackendPass( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* delete_pass_name) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DeletePaddleBackendPass(std::string(delete_pass_name)); +} + +void FD_C_RuntimeOptionWrapperEnablePaddleLogInfo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnablePaddleLogInfo(); +} + +void FD_C_RuntimeOptionWrapperDisablePaddleLogInfo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisablePaddleLogInfo(); +} + +void FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int size) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetPaddleMKLDNNCacheSize(size); +} + +void FD_C_RuntimeOptionWrapperSetOpenVINODevice( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* name) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetOpenVINODevice(std::string(name)); +} + +void FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* optimized_model_dir) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLiteOptimizedModelDir(std::string(optimized_model_dir)); +} + +void FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_subgraph_partition_config_path) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLiteSubgraphPartitionPath( + std::string(nnadapter_subgraph_partition_config_path)); +} + +void FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_subgraph_partition_config_buffer) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLiteSubgraphPartitionConfigBuffer( + std::string(nnadapter_subgraph_partition_config_buffer)); +} + +void FD_C_RuntimeOptionWrapperSetLiteContextProperties( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_context_properties) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLiteContextProperties( + std::string(nnadapter_context_properties)); +} + +void FD_C_RuntimeOptionWrapperSetLiteModelCacheDir( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_model_cache_dir) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLiteModelCacheDir(std::string(nnadapter_model_cache_dir)); +} + +void FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_mixed_precision_quantization_config_path) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); +} + +void FD_C_RuntimeOptionWrapperEnableLiteFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnableLiteFP16(); +} + +void FD_C_RuntimeOptionWrapperDisableLiteFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisableLiteFP16(); +} + +void FD_C_RuntimeOptionWrapperEnableLiteInt8( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnableLiteInt8(); +} + +void FD_C_RuntimeOptionWrapperDisableLiteInt8( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisableLiteInt8(); +} + +void FD_C_RuntimeOptionWrapperSetLitePowerMode( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_LitePowerMode mode) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetLitePowerMode( + static_cast(mode)); +} + +void FD_C_RuntimeOptionWrapperEnableTrtFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnableTrtFP16(); +} + +void FD_C_RuntimeOptionWrapperDisableTrtFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisableTrtFP16(); +} + +void FD_C_RuntimeOptionWrapperSetTrtCacheFile( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* cache_file_path) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetTrtCacheFile(std::string(cache_file_path)); +} + +void FD_C_RuntimeOptionWrapperEnablePinnedMemory( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnablePinnedMemory(); +} + +void FD_C_RuntimeOptionWrapperDisablePinnedMemory( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisablePinnedMemory(); +} + +void FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->EnablePaddleTrtCollectShape(); +} + +void FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->DisablePaddleTrtCollectShape(); +} + +void FD_C_RuntimeOptionWrapperSetOpenVINOStreams( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int num_streams) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetOpenVINOStreams(num_streams); +} + +void FD_C_RuntimeOptionWrapperUseIpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int device_num, int micro_batch_size, FD_C_Bool enable_pipelining, + int batches_per_step) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->UseIpu(device_num, micro_batch_size, enable_pipelining, + batches_per_step); +} + +void FD_C_RuntimeOptionWrapperSetIpuConfig( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion, + FD_C_Bool enable_half_partial) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + runtime_option->SetIpuConfig(enable_fp16, replica_num, + available_memory_proportion, + enable_half_partial); +} + +} // extern "C" diff --git a/c_api/fastdeploy_capi/runtime_option.h b/c_api/fastdeploy_capi/runtime_option.h new file mode 100644 index 000000000..cfc087473 --- /dev/null +++ b/c_api/fastdeploy_capi/runtime_option.h @@ -0,0 +1,517 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "fastdeploy_capi/fd_common.h" + +typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new FD_C_RuntimeOptionWrapper object + * + * \return Return a pointer to FD_C_RuntimeOptionWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_RuntimeOptionWrapper* +FD_C_CreateRuntimeOptionWrapper(); + +/** \brief Destroy a FD_C_RuntimeOptionWrapper object + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyRuntimeOptionWrapper( + __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** \brief Set path of model file and parameter file + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model + * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams + * \param[in] format Format of the loaded model + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetModelPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* model_path, const char* params_path, + const FD_C_ModelFormat format); + +/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] model_buffer The memory buffer of model + * \param[in] params_buffer The memory buffer of the combined parameters file + * \param[in] format Format of the loaded model + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetModelBuffer( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* model_buffer, const char* params_buffer, + const FD_C_ModelFormat); + +/** \brief Use cpu to inference, the runtime will inference on CPU by default + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseCpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** \brief Use Nvidia GPU to inference + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseGpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int gpu_id); + +/** \brief Use RKNPU2 to inference + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] rknpu2_name CpuName enum value + * \param[in] rknpu2_core CoreMask enum value + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseRKNPU2( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_rknpu2_CpuName rknpu2_name, FD_C_rknpu2_CoreMask rknpu2_core); + +/** \brief Use TimVX to inference + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseTimVX( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** \brief Use Huawei Ascend to inference + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseAscend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/// +/// \brief Turn on KunlunXin XPU. +/// +/// \param[in] fd_c_runtime_option_wrapper pointer to \ + FD_C_RuntimeOptionWrapper object +/// \param[in] kunlunxin_id the KunlunXin XPU card to use\ + (default is 0). +/// \param[in] l3_workspace_size The size of the video memory allocated\ +/// by the l3 cache, the maximum is 16M. +/// \param[in] locked Whether the allocated L3 cache can be locked. If false, +/// it means that the L3 cache is not locked, and the allocated L3 +/// cache can be shared by multiple models, and multiple models +/// sharing the L3 cache will be executed sequentially on the card. +/// \param[in] autotune Whether to autotune the conv operator in the model. If +/// true, when the conv operator of a certain dimension is executed +/// for the first time, it will automatically search for a better +/// algorithm to improve the performance of subsequent conv operators +/// of the same dimension. +/// \param[in] autotune_file Specify the path of the autotune file. If +/// autotune_file is specified, the algorithm specified in the +/// file will be used and autotune will not be performed again. +/// \param[in] precision Calculation accuracy of multi_encoder +/// \param[in] adaptive_seqlen Is the input of multi_encoder variable length +/// \param[in] enable_multi_stream Whether to enable the multi stream of +/// KunlunXin XPU. +/// +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked, + FD_C_Bool autotune, const char* autotune_file, const char* precision, + FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream); + +/** Use Sophgo to inference + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseSophgo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetExternalStream( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + void* external_stream); + +/** + * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] thread_num number of threads + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetCpuThreadNum( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int thread_num); + +/** + * @brief Set ORT graph opt level, default is decide by ONNX Runtime itself + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] level optimization level + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int level); + +/** + * @brief Set Paddle Inference as inference backend, support CPU/GPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUsePaddleBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Wrapper function of UsePaddleBackend() + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperUsePaddleInferBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set ONNX Runtime as inference backend, support CPU/GPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseOrtBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set SOPHGO Runtime as inference backend, support CPU/GPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseSophgoBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set TensorRT as inference backend, only support GPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseTrtBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set Poros backend as inference backend, support CPU/GPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUsePorosBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set OpenVINO as inference backend, only support CPU + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseOpenVINOBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set Paddle Lite as inference backend, only support arm cpu + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseLiteBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Wrapper function of UseLiteBackend() + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperUsePaddleLiteBackend( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set mkldnn switch while using Paddle Inference as inference backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] pd_mkldnn whether to use mkldnn + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetPaddleMKLDNN( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_Bool pd_mkldnn); + +/** + * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePaddleToTrt( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] delete_pass_name pass name + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperDeletePaddleBackendPass( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* delete_pass_name); + +/** + * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePaddleLogInfo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Disable print debug information while using Paddle Inference as inference backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperDisablePaddleLogInfo( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] size cache size + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, int size); + +/** + * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] name device name + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOpenVINODevice( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* name); + +/** + * @brief Set optimzed model dir for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] optimized_model_dir optimzed model dir + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* optimized_model_dir); + +/** + * @brief Set subgraph partition path for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] nnadapter_subgraph_partition_config_path subgraph partition path + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_subgraph_partition_config_path); + +/** + * @brief Set subgraph partition path for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] nnadapter_subgraph_partition_config_buffer subgraph partition path + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_subgraph_partition_config_buffer); + +/** + * @brief Set context properties for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] nnadapter_context_properties context properties + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteContextProperties( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_context_properties); + +/** + * @brief Set model cache dir for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] nnadapter_model_cache_dir model cache dir + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteModelCacheDir( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_model_cache_dir); + +/** + * @brief Set mixed precision quantization config path for Paddle Lite backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] nnadapter_mixed_precision_quantization_config_path mixed precision quantization config path + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* nnadapter_mixed_precision_quantization_config_path); + +/** + * @brief enable half precision while use paddle lite backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableLiteFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief disable half precision, change to full precision(float32) + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableLiteFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief enable int8 precision while use paddle lite backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableLiteInt8( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief disable int8 precision, change to full precision(float32) + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableLiteInt8( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details) + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] mode power mode + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetLitePowerMode( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_LitePowerMode mode); + +/** + * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableTrtFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Disable FP16 inference while using TensorRT backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableTrtFP16( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] cache_file_path cache file path + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetTrtCacheFile( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const char* cache_file_path); + +/** + * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePinnedMemory( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Disable pinned memory + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisablePinnedMemory( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Enable to collect shape in paddle trt backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Disable to collect shape in paddle trt backend + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + */ +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); + +/** + * @brief Set number of streams by the OpenVINO backends + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] num_streams number of streams + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOpenVINOStreams( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int num_streams); + +/** + * @brief \Use Graphcore IPU to inference. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] device_num the number of IPUs. + * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info. + * \param[in] enable_pipelining enable pipelining. + * \param[in] batches_per_step the number of batches per run in pipelining. + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseIpu( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + int device_num, int micro_batch_size, FD_C_Bool enable_pipelining, + int batches_per_step); + +/** \brief Set IPU config. + * + * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object + * \param[in] enable_fp16 enable fp16. + * \param[in] replica_num the number of graph replication. + * \param[in] available_memory_proportion the available memory proportion for matmul/conv. + * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16. + */ +FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetIpuConfig( + __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion, + FD_C_Bool enable_half_partial); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/c_api/fastdeploy_capi/types_internal.cc b/c_api/fastdeploy_capi/types_internal.cc new file mode 100644 index 000000000..807f5dd21 --- /dev/null +++ b/c_api/fastdeploy_capi/types_internal.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/types_internal.h" + +namespace fastdeploy { + +#ifdef ENABLE_VISION + +std::unique_ptr& +FD_C_CheckAndConvertPaddleClasModelWrapper( + FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) { + FDASSERT( + fd_c_paddleclas_model_wrapper != nullptr, + "The pointer of fd_c_paddleclas_model_wrapper shouldn't be nullptr."); + return fd_c_paddleclas_model_wrapper->paddleclas_model; +} + +std::unique_ptr& +FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) { + FDASSERT(fd_c_ppyoloe_wrapper != nullptr, + "The pointer of fd_c_ppyoloe_wrapper shouldn't be nullptr."); + return fd_c_ppyoloe_wrapper->ppyoloe_model; +} + +std::unique_ptr& +FD_C_CheckAndConvertClassifyResultWrapper( + FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) { + FDASSERT(fd_c_classify_result_wrapper != nullptr, + "The pointer of fd_c_classify_result_wrapper shouldn't be nullptr."); + return fd_c_classify_result_wrapper->classify_result; +} + +std::unique_ptr& +FD_C_CheckAndConvertDetectionResultWrapper( + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) { + FDASSERT( + fd_c_detection_result_wrapper != nullptr, + "The pointer of fd_c_detection_result_wrapper shouldn't be nullptr."); + return fd_c_detection_result_wrapper->detection_result; +} +#endif + +std::unique_ptr& +FD_C_CheckAndConvertRuntimeOptionWrapper( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) { + FDASSERT(fd_c_runtime_option_wrapper != nullptr, + "The pointer of fd_c_runtime_option_wrapper shouldn't be nullptr."); + return fd_c_runtime_option_wrapper->runtime_option; +} + +} // namespace fastdeploy \ No newline at end of file diff --git a/c_api/fastdeploy_capi/types_internal.h b/c_api/fastdeploy_capi/types_internal.h new file mode 100644 index 000000000..f8a2cfbe9 --- /dev/null +++ b/c_api/fastdeploy_capi/types_internal.h @@ -0,0 +1,70 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/runtime/runtime_option.h" +#include "fastdeploy_capi/fd_type.h" +#include + +#ifdef ENABLE_VISION +#include "fastdeploy/vision/classification/ppcls/model.h" +#include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/detection/ppdet/model.h" + +typedef struct FD_C_ClassifyResultWrapper { + std::unique_ptr classify_result; +} FD_C_ClassifyResultWrapper; + +typedef struct FD_C_DetectionResultWrapper { + std::unique_ptr detection_result; +} FD_C_DetectionResultWrapper; + +typedef struct FD_C_PaddleClasModelWrapper { + std::unique_ptr + paddleclas_model; +} FD_C_PaddleClasModelWrapper; + +typedef struct FD_C_PPYOLOEWrapper { + std::unique_ptr ppyoloe_model; +} FD_C_PPYOLOEWrapper; + +namespace fastdeploy { +std::unique_ptr& +FD_C_CheckAndConvertClassifyResultWrapper( + FD_C_ClassifyResultWrapper* fd_classify_result_wrapper); +std::unique_ptr& +FD_C_CheckAndConvertDetectionResultWrapper( + FD_C_DetectionResultWrapper* fd_detection_result_wrapper); +std::unique_ptr& +FD_C_CheckAndConvertPaddleClasModelWrapper( + FD_C_PaddleClasModelWrapper* fd_paddleclas_model_wrapper); +std::unique_ptr& +FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper); +} // namespace fastdeploy + +#endif + +typedef struct FD_C_RuntimeOptionWrapper { + std::unique_ptr runtime_option; +} FD_C_RuntimeOptionWrapper; + +namespace fastdeploy { +std::unique_ptr& +FD_C_CheckAndConvertRuntimeOptionWrapper( + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper); +} + +#define CHECK_AND_CONVERT_FD_TYPE(TYPENAME, variable_name) \ + fastdeploy::FD_C_CheckAndConvert##TYPENAME(variable_name) diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc new file mode 100644 index 000000000..3ed62f26a --- /dev/null +++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/vision/classification/ppcls/model.h" + +#include "fastdeploy_capi/types_internal.h" + +extern "C" { + +FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper = + new FD_C_PaddleClasModelWrapper(); + fd_c_paddleclas_model_wrapper->paddleclas_model = + std::unique_ptr( + new fastdeploy::vision::classification::PaddleClasModel( + std::string(model_file), std::string(params_file), + std::string(config_file), *runtime_option, + static_cast(model_format))); + return fd_c_paddleclas_model_wrapper; +} + +void FD_C_DestroyPaddleClasModelWrapper( + __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) { + delete fd_c_paddleclas_model_wrapper; +} + +FD_C_Bool FD_C_PaddleClasModelWrapperPredict( + __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper, + FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) { + cv::Mat* im = reinterpret_cast(img); + auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE( + PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper); + auto& classify_result = CHECK_AND_CONVERT_FD_TYPE( + ClassifyResultWrapper, fd_c_classify_result_wrapper); + return paddleclas_model->Predict(im, classify_result.get()); +} +} \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.h b/c_api/fastdeploy_capi/vision/classification/ppcls/model.h new file mode 100644 index 000000000..db117e605 --- /dev/null +++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.h @@ -0,0 +1,66 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy_capi/fd_common.h" +#include "fastdeploy_capi/fd_type.h" +#include "fastdeploy_capi/runtime_option.h" +#include "fastdeploy_capi/vision/result.h" + +typedef struct FD_C_PaddleClasModelWrapper FD_C_PaddleClasModelWrapper; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new FD_C_PaddleClasModelWrapper object + * + * \param[in] model_file Path of model file, e.g resnet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml + * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends` + * \param[in] model_format Model format of the loaded model, default is Paddle format + * + * \return Return a pointer to FD_C_PaddleClasModelWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PaddleClasModelWrapper* +FD_C_CreatePaddleClasModelWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format); + +/** \brief Destroy a FD_C_PaddleClasModelWrapper object + * + * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyPaddleClasModelWrapper( + __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper); + +/** \brief Predict the classification result for an input image + * + * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object + * \param[in] img pointer to cv::Mat image + * \param[in] fd_c_classify_result_wrapper pointer to FD_C_PaddleClasModelWrapper object, which stores the result. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleClasModelWrapperPredict( + __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper, + FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc new file mode 100644 index 000000000..17a87ec8b --- /dev/null +++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/vision/detection/ppdet/model.h" + +#include "fastdeploy_capi/types_internal.h" +#include "fastdeploy_capi/vision/visualize.h" + +extern "C" { + +FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper = new FD_C_PPYOLOEWrapper(); + fd_c_ppyoloe_wrapper->ppyoloe_model = + std::unique_ptr( + new fastdeploy::vision::detection::PPYOLOE( + std::string(model_file), std::string(params_file), + std::string(config_file), *runtime_option, + static_cast(model_format))); + return fd_c_ppyoloe_wrapper; +} + +void FD_C_DestroyPPYOLOEWrapper( + __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) { + delete fd_c_ppyoloe_wrapper; +} + +FD_C_Bool FD_C_PPYOLOEWrapperPredict( + FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img, + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) { + cv::Mat* im = reinterpret_cast(img); + auto& ppyoloe_model = + CHECK_AND_CONVERT_FD_TYPE(PPYOLOEWrapper, fd_c_ppyoloe_wrapper); + auto& detection_result = CHECK_AND_CONVERT_FD_TYPE( + DetectionResultWrapper, fd_c_detection_result_wrapper); + return ppyoloe_model->Predict(im, detection_result.get()); +} +} \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.h b/c_api/fastdeploy_capi/vision/detection/ppdet/model.h new file mode 100644 index 000000000..6dce7a64e --- /dev/null +++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.h @@ -0,0 +1,67 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy_capi/fd_common.h" +#include "fastdeploy_capi/fd_type.h" +#include "fastdeploy_capi/runtime_option.h" +#include "fastdeploy_capi/vision/result.h" + +typedef struct FD_C_PPYOLOEWrapper FD_C_PPYOLOEWrapper; +typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper; + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Create a new FD_C_PPYOLOEWrapper object + * + * \param[in] model_file Path of model file, e.g resnet/model.pdmodel + * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored + * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml + * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends` + * \param[in] model_format Model format of the loaded model, default is Paddle format + * + * \return Return a pointer to FD_C_PPYOLOEWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPYOLOEWrapper* +FD_C_CreatesPPYOLOEWrapper( + const char* model_file, const char* params_file, const char* config_file, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format); + +/** \brief Destroy a FD_C_PPYOLOEWrapper object + * + * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_DestroyPPYOLOEWrapper(__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper); + +/** \brief Predict the detection result for an input image + * + * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object + * \param[in] img pointer to cv::Mat image + * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPYOLOEWrapperPredict( + __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img, + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/c_api/fastdeploy_capi/vision/result.cc b/c_api/fastdeploy_capi/vision/result.cc new file mode 100644 index 000000000..abf52ba69 --- /dev/null +++ b/c_api/fastdeploy_capi/vision/result.cc @@ -0,0 +1,238 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/vision/result.h" + +#include "fastdeploy/utils/utils.h" +#include "fastdeploy_capi/types_internal.h" + +extern "C" { + +// Classification Results + +FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapper() { + FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper = + new FD_C_ClassifyResultWrapper(); + fd_c_classify_result_wrapper->classify_result = + std::unique_ptr( + new fastdeploy::vision::ClassifyResult()); + return fd_c_classify_result_wrapper; +} + +void FD_C_DestroyClassifyResultWrapper( + __fd_take FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) { + delete fd_c_classify_result_wrapper; +} + +void FD_C_DestroyClassifyResult( + __fd_take FD_C_ClassifyResult* fd_c_classify_result) { + if (fd_c_classify_result == nullptr) return; + // delete label_ids + delete[] fd_c_classify_result->label_ids.data; + // delete scores + delete[] fd_c_classify_result->scores.data; + delete fd_c_classify_result; +} + +FD_C_ClassifyResult* FD_C_ClassifyResultWrapperGetData( + __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) { + auto& classify_result = CHECK_AND_CONVERT_FD_TYPE( + ClassifyResultWrapper, fd_c_classify_result_wrapper); + FD_C_ClassifyResult* fd_c_classify_result_data = new FD_C_ClassifyResult(); + // copy label_ids + fd_c_classify_result_data->label_ids.size = classify_result->label_ids.size(); + fd_c_classify_result_data->label_ids.data = + new int32_t[fd_c_classify_result_data->label_ids.size]; + memcpy(fd_c_classify_result_data->label_ids.data, + classify_result->label_ids.data(), + sizeof(int32_t) * fd_c_classify_result_data->label_ids.size); + // copy scores + fd_c_classify_result_data->scores.size = classify_result->scores.size(); + fd_c_classify_result_data->scores.data = + new float[fd_c_classify_result_data->scores.size]; + memcpy(fd_c_classify_result_data->scores.data, classify_result->scores.data(), + sizeof(float) * fd_c_classify_result_data->scores.size); + fd_c_classify_result_data->type = + static_cast(classify_result->type); + return fd_c_classify_result_data; +} + +FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData( + __fd_keep FD_C_ClassifyResult* fd_c_classify_result) { + FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper = + FD_C_CreateClassifyResultWrapper(); + auto& classify_result = CHECK_AND_CONVERT_FD_TYPE( + ClassifyResultWrapper, fd_c_classify_result_wrapper); + // copy label_ids + classify_result->label_ids.resize(fd_c_classify_result->label_ids.size); + memcpy(classify_result->label_ids.data(), + fd_c_classify_result->label_ids.data, + sizeof(int32_t) * fd_c_classify_result->label_ids.size); + // copy scores + classify_result->scores.resize(fd_c_classify_result->scores.size); + memcpy(classify_result->scores.data(), fd_c_classify_result->scores.data, + sizeof(int32_t) * fd_c_classify_result->scores.size); + classify_result->type = + static_cast(fd_c_classify_result->type); + return fd_c_classify_result_wrapper; +} + +// Detection Results + +FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper() { + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper = + new FD_C_DetectionResultWrapper(); + fd_c_detection_result_wrapper->detection_result = + std::unique_ptr( + new fastdeploy::vision::DetectionResult()); + return fd_c_detection_result_wrapper; +} + +void FD_C_DestroyDetectionResultWrapper( + __fd_take FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) { + delete fd_c_detection_result_wrapper; +} + +void FD_C_DestroyDetectionResult( + __fd_take FD_C_DetectionResult* fd_c_detection_result) { + if (fd_c_detection_result == nullptr) return; + // delete boxes + for (size_t i = 0; i < fd_c_detection_result->boxes.size; i++) { + delete[] fd_c_detection_result->boxes.data[i].data; + } + delete[] fd_c_detection_result->boxes.data; + // delete scores + delete[] fd_c_detection_result->scores.data; + // delete label_ids + delete[] fd_c_detection_result->label_ids.data; + // delete masks + for (size_t i = 0; i < fd_c_detection_result->masks.size; i++) { + delete[] fd_c_detection_result->masks.data[i].data.data; + delete[] fd_c_detection_result->masks.data[i].shape.data; + } + delete fd_c_detection_result; +} + +FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData( + __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) { + auto& detection_result = CHECK_AND_CONVERT_FD_TYPE( + DetectionResultWrapper, fd_c_detection_result_wrapper); + FD_C_DetectionResult* fd_c_detection_result = new FD_C_DetectionResult(); + // copy boxes + const int boxes_coordinate_dim = 4; + fd_c_detection_result->boxes.size = detection_result->boxes.size(); + fd_c_detection_result->boxes.data = + new FD_C_OneDimArrayFloat[fd_c_detection_result->boxes.size]; + for (size_t i = 0; i < detection_result->boxes.size(); i++) { + fd_c_detection_result->boxes.data[i].size = boxes_coordinate_dim; + fd_c_detection_result->boxes.data[i].data = new float[boxes_coordinate_dim]; + for (size_t j = 0; j < boxes_coordinate_dim; j++) { + fd_c_detection_result->boxes.data[i].data[j] = + detection_result->boxes[i][j]; + } + } + // copy scores + fd_c_detection_result->scores.size = detection_result->scores.size(); + fd_c_detection_result->scores.data = + new float[fd_c_detection_result->scores.size]; + memcpy(fd_c_detection_result->scores.data, detection_result->scores.data(), + sizeof(float) * fd_c_detection_result->scores.size); + // copy label_ids + fd_c_detection_result->label_ids.size = detection_result->label_ids.size(); + fd_c_detection_result->label_ids.data = + new int32_t[fd_c_detection_result->label_ids.size]; + memcpy(fd_c_detection_result->label_ids.data, + detection_result->label_ids.data(), + sizeof(int32_t) * fd_c_detection_result->label_ids.size); + // copy masks + fd_c_detection_result->masks.size = detection_result->masks.size(); + fd_c_detection_result->masks.data = + new FD_C_Mask[fd_c_detection_result->masks.size]; + for (size_t i = 0; i < detection_result->masks.size(); i++) { + // copy data in mask + fd_c_detection_result->masks.data[i].data.size = + detection_result->masks[i].data.size(); + fd_c_detection_result->masks.data[i].data.data = + new uint8_t[detection_result->masks[i].data.size()]; + memcpy(fd_c_detection_result->masks.data[i].data.data, + detection_result->masks[i].data.data(), + sizeof(uint8_t) * detection_result->masks[i].data.size()); + // copy shape in mask + fd_c_detection_result->masks.data[i].shape.size = + detection_result->masks[i].shape.size(); + fd_c_detection_result->masks.data[i].shape.data = + new int64_t[detection_result->masks[i].shape.size()]; + memcpy(fd_c_detection_result->masks.data[i].shape.data, + detection_result->masks[i].shape.data(), + sizeof(int64_t) * detection_result->masks[i].shape.size()); + fd_c_detection_result->masks.data[i].type = + static_cast(detection_result->masks[i].type); + } + fd_c_detection_result->contain_masks = detection_result->contain_masks; + fd_c_detection_result->type = + static_cast(detection_result->type); + return fd_c_detection_result; +} + +FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData( + __fd_keep FD_C_DetectionResult* fd_c_detection_result) { + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper = + FD_C_CreateDetectionResultWrapper(); + auto& detection_result = CHECK_AND_CONVERT_FD_TYPE( + DetectionResultWrapper, fd_c_detection_result_wrapper); + + // copy boxes + const int boxes_coordinate_dim = 4; + detection_result->boxes.resize(fd_c_detection_result->boxes.size); + for (size_t i = 0; i < fd_c_detection_result->boxes.size; i++) { + for (size_t j = 0; j < boxes_coordinate_dim; j++) { + detection_result->boxes[i][j] = + fd_c_detection_result->boxes.data[i].data[j]; + } + } + // copy scores + detection_result->scores.resize(fd_c_detection_result->scores.size); + memcpy(detection_result->scores.data(), fd_c_detection_result->scores.data, + sizeof(float) * fd_c_detection_result->scores.size); + // copy label_ids + detection_result->label_ids.resize(fd_c_detection_result->label_ids.size); + memcpy(detection_result->label_ids.data(), + fd_c_detection_result->label_ids.data, + sizeof(int32_t) * fd_c_detection_result->label_ids.size); + // copy masks + detection_result->masks.resize(fd_c_detection_result->masks.size); + for (size_t i = 0; i < fd_c_detection_result->masks.size; i++) { + // copy data in mask + detection_result->masks[i].data.resize( + fd_c_detection_result->masks.data[i].data.size); + memcpy(detection_result->masks[i].data.data(), + fd_c_detection_result->masks.data[i].data.data, + sizeof(uint8_t) * fd_c_detection_result->masks.data[i].data.size); + // copy shape in mask + detection_result->masks[i].shape.resize( + fd_c_detection_result->masks.data[i].shape.size); + memcpy(detection_result->masks[i].shape.data(), + fd_c_detection_result->masks.data[i].shape.data, + sizeof(int64_t) * fd_c_detection_result->masks.data[i].shape.size); + detection_result->masks[i].type = + static_cast( + fd_c_detection_result->masks.data[i].type); + } + detection_result->contain_masks = fd_c_detection_result->contain_masks; + detection_result->type = + static_cast(fd_c_detection_result->type); + + return fd_c_detection_result_wrapper; +} +} \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/result.h b/c_api/fastdeploy_capi/vision/result.h new file mode 100644 index 000000000..9d32052d9 --- /dev/null +++ b/c_api/fastdeploy_capi/vision/result.h @@ -0,0 +1,161 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy_capi/fd_common.h" +#include "fastdeploy_capi/fd_type.h" + +typedef struct FD_C_ClassifyResultWrapper FD_C_ClassifyResultWrapper; +typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper; + +#ifdef __cplusplus +extern "C" { +#endif + +FD_ENUM(FD_C_ResultType){ + UNKNOWN_RESULT, + CLASSIFY, + DETECTION, + SEGMENTATION, + OCR, + MOT, + FACE_DETECTION, + FACE_ALIGNMENT, + FACE_RECOGNITION, + MATTING, + MASK, + KEYPOINT_DETECTION, + HEADPOSE, +}; + +typedef struct FD_C_ClassifyResult { + FD_C_OneDimArrayInt32 label_ids; + FD_C_OneDimArrayFloat scores; + FD_C_ResultType type; +} FD_C_ClassifyResult; + +typedef struct FD_C_Mask { + FD_C_OneDimArrayUint8 data; + FD_C_OneDimArrayInt64 shape; + FD_C_ResultType type; +} FD_C_Mask; + +typedef struct FD_C_OneDimMask { + size_t size; + FD_C_Mask* data; +} FD_C_OneDimMask; // std::vector + +typedef struct FD_C_DetectionResult { + FD_C_TwoDimArrayFloat boxes; + FD_C_OneDimArrayFloat scores; + FD_C_OneDimArrayInt32 label_ids; + FD_C_OneDimMask masks; + FD_C_Bool contain_masks; + FD_C_ResultType type; +} FD_C_DetectionResult; + +// Classification Results + +/** \brief Create a new FD_C_ClassifyResultWrapper object + * + * \return Return a pointer to FD_C_ClassifyResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper* +FD_C_CreateClassifyResultWrapper(); + +/** \brief Destroy a FD_C_ClassifyResultWrapper object + * + * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyClassifyResultWrapper( + __fd_take FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper); + +/** \brief Destroy a FD_C_ClassifyResult object + * + * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object + */ + +FASTDEPLOY_CAPI_EXPORT extern void +FD_C_DestroyClassifyResult(__fd_take FD_C_ClassifyResult* fd_c_classify_result); + +/** \brief Get a FD_C_ClassifyResult object from FD_C_ClassifyResultWrapper object + * + * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object + * \return Return a pointer to FD_C_ClassifyResult object + */ +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResult* +FD_C_ClassifyResultWrapperGetData( + __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper); + +/** \brief Create a new FD_C_ClassifyResultWrapper object from FD_C_ClassifyResult object + * + * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object + * \return Return a pointer to FD_C_ClassifyResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper* +FD_C_CreateClassifyResultWrapperFromData( + __fd_keep FD_C_ClassifyResult* fd_c_classify_result); + +// Detection Results + +/** \brief Create a new FD_C_DetectionResultWrapper object + * + * \return Return a pointer to FD_C_DetectionResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper* +FD_C_CreateDetectionResultWrapper(); + +/** \brief Destroy a FD_C_DetectionResultWrapper object + * + * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyDetectionResultWrapper( + __fd_take FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper); + +/** \brief Destroy a FD_C_DetectionResult object + * + * \param[in] fd_c_detection_result pointer to FD_C_DetectionResult object + */ + +FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyDetectionResult( + __fd_take FD_C_DetectionResult* fd_c_detection_result); + +/** \brief Get a FD_C_DetectionResult object from FD_C_DetectionResultWrapper object + * + * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object + * \return Return a pointer to FD_C_DetectionResult object + */ +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResult* +FD_C_DetectionResultWrapperGetData( + __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper); + +/** \brief Create a new FD_C_DetectionResultWrapper object from FD_C_DetectionResult object + * + * \param[in] fd_c_detection_result pointer to FD_C_DetectionResult object + * \return Return a pointer to FD_C_DetectionResultWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper* +FD_C_CreateDetectionResultWrapperFromData( + __fd_keep FD_C_DetectionResult* fd_c_detection_result); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/c_api/fastdeploy_capi/vision/visualize.cc b/c_api/fastdeploy_capi/vision/visualize.cc new file mode 100644 index 000000000..9132fe606 --- /dev/null +++ b/c_api/fastdeploy_capi/vision/visualize.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy_capi/vision/visualize.h" + +#include "fastdeploy/vision/visualize/visualize.h" +#include "fastdeploy_capi/types_internal.h" + +extern "C" { + +FD_C_Mat FD_C_VisDetection(FD_C_Mat im, + FD_C_DetectionResult* fd_c_detection_result, + float score_threshold, int line_size, + float font_size) { + FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper = + FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result); + auto& detection_result = CHECK_AND_CONVERT_FD_TYPE( + DetectionResultWrapper, fd_c_detection_result_wrapper); + cv::Mat result = fastdeploy::vision::Visualize::VisDetection( + *(reinterpret_cast(im)), *detection_result, score_threshold, + line_size, font_size); + return new cv::Mat(result); +} +} \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/visualize.h b/c_api/fastdeploy_capi/vision/visualize.h new file mode 100644 index 000000000..43d406dab --- /dev/null +++ b/c_api/fastdeploy_capi/vision/visualize.h @@ -0,0 +1,36 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy_capi/fd_common.h" +#include "fastdeploy_capi/fd_type.h" +#include "fastdeploy_capi/vision/result.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief Visualize Detection + * + * \return Return a pointer to cv::Mat object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat +FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result, + float score_threshold, int line_size, float font_size); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/cmake/summary.cmake b/cmake/summary.cmake index ee2efccb7..1482539c1 100755 --- a/cmake/summary.cmake +++ b/cmake/summary.cmake @@ -44,6 +44,7 @@ function(fastdeploy_summary) message(STATUS " WITH_ASCEND : ${WITH_ASCEND}") message(STATUS " WITH_TIMVX : ${WITH_TIMVX}") message(STATUS " WITH_KUNLUNXIN : ${WITH_KUNLUNXIN}") + message(STATUS " WITH_CAPI : ${WITH_CAPI}") if(ENABLE_ORT_BACKEND) message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}") endif() From ab5377b5fab12f48cdaacfbfa60382c16588fbbd Mon Sep 17 00:00:00 2001 From: DefTruth <31974251+DefTruth@users.noreply.github.com> Date: Thu, 9 Feb 2023 11:58:07 +0800 Subject: [PATCH 04/14] [Benchmark] Add PaddleYOLOv8 cpp benchmark example & lite flags option (#1270) * [Android] Add PaddleYOLOv8 cpp benchmark example & lite flags option * [Benchmark] add linux x86_64 gpu benchmark build script --- benchmark/cpp/CMakeLists.txt | 3 + benchmark/cpp/benchmark_ppyolov8.cc | 125 ++++++++++++++++++ benchmark/cpp/benchmark_yolov5.cc | 6 +- benchmark/cpp/flags.h | 13 +- .../runtime/backends/lite/lite_backend.h | 2 +- .../build_android_cpp_with_benchmark.sh | 118 +++++++++++++++++ scripts/linux/build_linux_x86_64_cpp_gpu.sh | 2 +- ...ild_linux_x86_64_cpp_gpu_with_benchmark.sh | 83 ++++++++++++ 8 files changed, 345 insertions(+), 7 deletions(-) create mode 100644 benchmark/cpp/benchmark_ppyolov8.cc mode change 100755 => 100644 benchmark/cpp/benchmark_yolov5.cc create mode 100755 scripts/android/build_android_cpp_with_benchmark.sh create mode 100755 scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt index 9706587d3..c79e679c3 100755 --- a/benchmark/cpp/CMakeLists.txt +++ b/benchmark/cpp/CMakeLists.txt @@ -9,9 +9,12 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include_directories(${FASTDEPLOY_INCS}) add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc) +add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc) if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread) else() target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags) endif() diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc new file mode 100644 index 000000000..4bd6e0df4 --- /dev/null +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/benchmark/utils.h" +#include "fastdeploy/vision.h" +#include "flags.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +bool RunModel(std::string model_dir, std::string image_file, size_t warmup, + size_t repeats, size_t dump_period, std::string cpu_mem_file_name, + std::string gpu_mem_file_name) { + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option)) { + PrintUsage(); + return false; + } + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + if (FLAGS_profile_mode == "runtime") { + option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup); + } + auto model = fastdeploy::vision::detection::PaddleYOLOv8( + model_file, params_file, config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return false; + } + auto im = cv::imread(image_file); + // For Runtime + if (FLAGS_profile_mode == "runtime") { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + double profile_time = model.GetProfileTime() * 1000; + std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } else { + // For End2End + // Step1: warm up for warmup times + std::cout << "Warmup " << warmup << " times..." << std::endl; + for (int i = 0; i < warmup; i++) { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + } + std::vector end2end_statis; + // Step2: repeat for repeats times + std::cout << "Counting time..." << std::endl; + fastdeploy::TimeCounter tc; + fastdeploy::vision::DetectionResult res; + for (int i = 0; i < repeats; i++) { + if (FLAGS_collect_memory_info && i % dump_period == 0) { + fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); +#if defined(WITH_GPU) + fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, + FLAGS_device_id); +#endif + } + tc.Start(); + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + tc.End(); + end2end_statis.push_back(tc.Duration() * 1000); + } + float end2end = std::accumulate(end2end_statis.end() - repeats, + end2end_statis.end(), 0.f) / + repeats; + std::cout << "End2End(ms): " << end2end << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } + + return true; +} + +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + int repeats = FLAGS_repeat; + int warmup = FLAGS_warmup; + int dump_period = FLAGS_dump_period; + std::string cpu_mem_file_name = "result_cpu.txt"; + std::string gpu_mem_file_name = "result_gpu.txt"; + // Run model + if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, + cpu_mem_file_name, gpu_mem_file_name) != true) { + exit(1); + } + if (FLAGS_collect_memory_info) { + float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); + std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; +#if defined(WITH_GPU) + float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); + std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; +#endif + } + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100755 new mode 100644 index d84292536..ae16dd8d8 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -65,8 +65,10 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, for (int i = 0; i < repeats; i++) { if (FLAGS_collect_memory_info && i % dump_period == 0) { fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); +#if defined(WITH_GPU) fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, FLAGS_device_id); +#endif } tc.Start(); if (!model.Predict(im, &res)) { @@ -102,9 +104,11 @@ int main(int argc, char* argv[]) { } if (FLAGS_collect_memory_info) { float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); - float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; +#if defined(WITH_GPU) + float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; +#endif } return 0; } \ No newline at end of file diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 3d35eb313..c9a8e8d91 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -27,13 +27,14 @@ DEFINE_int32(repeat, 1000, "Number of repeats for profiling."); DEFINE_string(profile_mode, "runtime", "runtime or end2end."); DEFINE_string(backend, "default", "The inference runtime backend, support: ['default', 'ort', " - "'paddle', 'ov', 'trt', 'paddle_trt']"); + "'paddle', 'ov', 'trt', 'paddle_trt', 'lite']"); DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread."); DEFINE_bool( include_h2d_d2h, false, "Whether run profiling with h2d and d2h."); DEFINE_bool( use_fp16, false, - "Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend"); + "Whether to use FP16 mode, only support 'trt', 'paddle_trt' " + "and 'lite' backend"); DEFINE_bool( collect_memory_info, false, "Whether to collect memory info"); DEFINE_int32(dump_period, 100, "How often to collect memory info."); @@ -58,7 +59,6 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { option->UsePaddleInferBackend(); } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { option->UseTrtBackend(); - option->SetTrtInputShape("input", {1, 3, 112, 112}); if (FLAGS_backend == "paddle_trt") { option->EnablePaddleToTrt(); } @@ -81,11 +81,16 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { option->UseOpenVINOBackend(); } else if (FLAGS_backend == "paddle") { option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "lite") { + option->UsePaddleLiteBackend(); + if (FLAGS_use_fp16) { + option->EnableLiteFP16(); + } } else if (FLAGS_backend == "default") { return true; } else { std::cout << "While inference with CPU, only support " - "default/ort/ov/paddle now, " + "default/ort/ov/paddle/lite now, " << FLAGS_backend << " is not supported." << std::endl; return false; } diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index bd738545a..15e71b50a 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -32,7 +32,7 @@ class LiteBackend : public BaseBackend { LiteBackend() {} virtual ~LiteBackend() = default; - bool Init(const RuntimeOption& option); + bool Init(const RuntimeOption& option) override; bool Infer(std::vector& inputs, std::vector* outputs, diff --git a/scripts/android/build_android_cpp_with_benchmark.sh b/scripts/android/build_android_cpp_with_benchmark.sh new file mode 100755 index 000000000..4a2c4084c --- /dev/null +++ b/scripts/android/build_android_cpp_with_benchmark.sh @@ -0,0 +1,118 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# mutable global variables +# ------------------------------------------------------------------------------- +TOOLCHAIN=clang # gcc/clang toolchain + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly ANDROID_ABI=$1 +readonly ANDROID_PLATFORM="android-$2" +readonly BUILD_ROOT=build/Android +readonly BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-$2 + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset C_INCLUDE_PATH + fi +} + +__set_android_ndk() { + if [ -z $ANDROID_NDK ]; then + echo "-- [INFO] ANDROID_NDK not exists, please setup manually ..." + exit 0 + else + echo "-- [INFO] Found ANDROID_NDK: ${ANDROID_NDK}" + fi + if [ "$ANDROID_NDK" ]; then + NDK_VERSION=$(echo $ANDROID_NDK | egrep -o "[0-9]{2}" | head -n 1) + if [ "$NDK_VERSION" -gt 17 ]; then + TOOLCHAIN=clang + fi + echo "-- [INFO] Checked ndk version: ${NDK_VERSION}" + echo "-- [INFO] Selected toolchain: ${TOOLCHAIN}" + fi +} + +__build_fastdeploy_android_shared() { + + local ANDROID_STL=c++_shared # c++_static + local ANDROID_TOOLCHAIN=${TOOLCHAIN} + local TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + -DCMAKE_BUILD_TYPE=MinSizeRel \ + -DANDROID_ABI=${ANDROID_ABI} \ + -DANDROID_NDK=${ANDROID_NDK} \ + -DANDROID_PLATFORM=${ANDROID_PLATFORM} \ + -DANDROID_STL=${ANDROID_STL} \ + -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ + -DENABLE_ORT_BACKEND=OFF \ + -DENABLE_LITE_BACKEND=ON \ + -DENABLE_PADDLE2ONNX=OFF \ + -DENABLE_FLYCV=ON \ + -DENABLE_TEXT=OFF \ + -DENABLE_VISION=ON \ + -DBUILD_EXAMPLES=ON \ + -DENABLE_BENCHMARK=ON \ + -DWITH_OPENCV_STATIC=OFF \ + -DWITH_LITE_STATIC=OFF \ + -DWITH_OPENMP=OFF \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][${ANDROID_ABI}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __set_android_ndk + __build_fastdeploy_android_shared + exit 0 +} + +main + +# Usage: +# ./scripts/android/build_android_cpp_with_benchmark.sh arm64-v8a 21 +# ./scripts/android/build_android_cpp_with_benchmark.sh armeabi-v7a 21 diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu.sh b/scripts/linux/build_linux_x86_64_cpp_gpu.sh index 6f2b4ed7d..9ae91921e 100755 --- a/scripts/linux/build_linux_x86_64_cpp_gpu.sh +++ b/scripts/linux/build_linux_x86_64_cpp_gpu.sh @@ -62,7 +62,7 @@ __build_fastdeploy_linux_x86_64_gpu_shared() { -DENABLE_OPENVINO_BACKEND=ON \ -DENABLE_PADDLE2ONNX=ON \ -DENABLE_VISION=ON \ - -DENABLE_BENCHMARK=ON \ + -DENABLE_BENCHMARK=OFF \ -DBUILD_EXAMPLES=ON \ -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ -Wno-dev ../../.. && make -j8 && make install diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh new file mode 100755 index 000000000..6f2b4ed7d --- /dev/null +++ b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh @@ -0,0 +1,83 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly BUILD_ROOT=build/Linux +readonly BUILD_DIR="${BUILD_ROOT}/x86_64_gpu" + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset C_INCLUDE_PATH + fi +} + +__build_fastdeploy_linux_x86_64_gpu_shared() { + + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DCMAKE_BUILD_TYPE=Release \ + -DWITH_GPU=ON \ + -DTRT_DIRECTORY=${TRT_DIRECTORY} \ + -DCUDA_DIRECTORY=${CUDA_DIRECTORY} \ + -DENABLE_ORT_BACKEND=ON \ + -DENABLE_TRT_BACKEND=ON \ + -DENABLE_PADDLE_BACKEND=ON \ + -DENABLE_OPENVINO_BACKEND=ON \ + -DENABLE_PADDLE2ONNX=ON \ + -DENABLE_VISION=ON \ + -DENABLE_BENCHMARK=ON \ + -DBUILD_EXAMPLES=ON \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][x86_64_gpu}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __build_fastdeploy_linux_x86_64_gpu_shared + exit 0 +} + +main + +# Usage: +# ./scripts/linux/build_linux_x86_64_cpp_gpu.sh From ea46586953e4bd0c96103dd0e62ad3036146d6f4 Mon Sep 17 00:00:00 2001 From: leiqing <54695910+leiqing1@users.noreply.github.com> Date: Thu, 9 Feb 2023 19:55:06 +0800 Subject: [PATCH 05/14] Update README_CN.md --- docs/README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README_CN.md b/docs/README_CN.md index da0c7b8a9..227db0407 100755 --- a/docs/README_CN.md +++ b/docs/README_CN.md @@ -10,7 +10,7 @@ - [IPU部署环境编译安装](cn/build_and_install/ipu.md) - [昆仑芯XPU部署环境编译安装](cn/build_and_install/kunlunxin.md) - [瑞芯微RV1126部署环境编译安装](cn/build_and_install/rv1126.md) -- [瑞芯微RK3588部署环境编译安装](cn/build_and_install/rknpu2.md) +- [瑞芯微RK3588、RK356X部署环境编译安装](cn/build_and_install/rknpu2.md) - [晶晨A311D部署环境编译安装](cn/build_and_install/a311d.md) - [华为昇腾部署环境编译安装](cn/build_and_install/huawei_ascend.md) - [Jetson部署环境编译安装](cn/build_and_install/jetson.md) From 4742f97819ccb4f6084a877fb653788344732a82 Mon Sep 17 00:00:00 2001 From: yeliang2258 <30516196+yeliang2258@users.noreply.github.com> Date: Thu, 9 Feb 2023 20:39:44 +0800 Subject: [PATCH 06/14] [Bug Fix] Fix KunlunXin valid_places (#1285) fix KunlunXin valid_places --- fastdeploy/runtime/backends/lite/configure_hardware.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc index 7ac60383f..cf8a958fe 100644 --- a/fastdeploy/runtime/backends/lite/configure_hardware.cc +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -51,8 +51,9 @@ void LiteBackend::ConfigureCpu(const LiteBackendOption& option) { void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { std::vector valid_places; - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); + // TODO(yeliang): Placing kInt8 first may cause accuracy issues of some model + // valid_places.push_back( + // paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); if (option.enable_fp16) { valid_places.push_back( paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); From c2e5f6317eae57f30db2627f93ae68b628338785 Mon Sep 17 00:00:00 2001 From: wwbitejotunn Date: Thu, 9 Feb 2023 05:51:30 +0000 Subject: [PATCH 07/14] fix paddle backend --- fastdeploy/runtime/backends/paddle/paddle_backend.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index e0e908c36..19493f90b 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -44,6 +44,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { "file will save to the directory where paddle model saved." << std::endl; use_static = true; + config_.SetOptimCacheDir(option.trt_option.serialize_file); } config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, From 898b0632161e8384ba7178598dfb2225549d586b Mon Sep 17 00:00:00 2001 From: wwbitejotunn Date: Thu, 9 Feb 2023 12:55:05 +0000 Subject: [PATCH 08/14] get cache dir --- fastdeploy/runtime/backends/paddle/paddle_backend.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index 19493f90b..4df109991 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -44,7 +44,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { "file will save to the directory where paddle model saved." << std::endl; use_static = true; - config_.SetOptimCacheDir(option.trt_option.serialize_file); + std::string opt_cache_dir = + GetDirFromPath(option.trt_option.serialize_file); + + config_.SetOptimCacheDir(opt_cache_dir); } config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, From ce424c313e3c2d7ad1cd9c44e711400c2ceb313c Mon Sep 17 00:00:00 2001 From: DefTruth <31974251+DefTruth@users.noreply.github.com> Date: Fri, 10 Feb 2023 11:24:40 +0800 Subject: [PATCH 09/14] [XPU] Add XPU option for benchmark example (#1287) * [XPU] Add xpu option for benchmark example * [XPU] Add fp16 flags --- benchmark/cpp/benchmark_ppyolov8.cc | 2 +- benchmark/cpp/flags.h | 31 ++++++-- ...ild_linux_x86_64_cpp_xpu_with_benchmark.sh | 79 +++++++++++++++++++ 3 files changed, 105 insertions(+), 7 deletions(-) create mode 100755 scripts/linux/build_linux_x86_64_cpp_xpu_with_benchmark.sh diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc index 4bd6e0df4..cff374200 100644 --- a/benchmark/cpp/benchmark_ppyolov8.cc +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -122,4 +122,4 @@ int main(int argc, char* argv[]) { #endif } return 0; -} \ No newline at end of file +} diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index c9a8e8d91..6ecf9b33e 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -20,8 +20,8 @@ DEFINE_string(model, "", "Directory of the inference model."); DEFINE_string(image, "", "Path of the image file."); DEFINE_string(device, "cpu", - "Type of inference device, support 'cpu' or 'gpu'."); -DEFINE_int32(device_id, 0, "device(gpu) id."); + "Type of inference device, support 'cpu/gpu/xpu'."); +DEFINE_int32(device_id, 0, "device(gpu/xpu/...) id."); DEFINE_int32(warmup, 200, "Number of warmup for profiling."); DEFINE_int32(repeat, 1000, "Number of repeats for profiling."); DEFINE_string(profile_mode, "runtime", "runtime or end2end."); @@ -41,8 +41,8 @@ DEFINE_int32(dump_period, 100, "How often to collect memory info."); void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path --device " - "[cpu|gpu] --backend " - "[default|ort|paddle|ov|trt|paddle_trt] " + "[cpu|gpu|xpu] --backend " + "[default|ort|paddle|ov|trt|paddle_trt|lite] " "--use_fp16 false" << std::endl; std::cout << "Default value of device: cpu" << std::endl; @@ -52,7 +52,7 @@ void PrintUsage() { bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { if (FLAGS_device == "gpu") { - option->UseGpu(); + option->UseGpu(FLAGS_device_id); if (FLAGS_backend == "ort") { option->UseOrtBackend(); } else if (FLAGS_backend == "paddle") { @@ -94,8 +94,27 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { << FLAGS_backend << " is not supported." << std::endl; return false; } + } else if (FLAGS_device == "xpu") { + option->UseKunlunXin(FLAGS_device_id); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "lite") { + option->UsePaddleLiteBackend(); + if (FLAGS_use_fp16) { + option->EnableLiteFP16(); + } + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with XPU, only support " + "default/ort/paddle/lite now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } } else { - std::cerr << "Only support device CPU/GPU now, " << FLAGS_device + std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device << " is not supported." << std::endl; return false; } diff --git a/scripts/linux/build_linux_x86_64_cpp_xpu_with_benchmark.sh b/scripts/linux/build_linux_x86_64_cpp_xpu_with_benchmark.sh new file mode 100755 index 000000000..e098883ea --- /dev/null +++ b/scripts/linux/build_linux_x86_64_cpp_xpu_with_benchmark.sh @@ -0,0 +1,79 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly BUILD_ROOT=build/Linux +readonly BUILD_DIR="${BUILD_ROOT}/x86_64_xpu" + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset C_INCLUDE_PATH + fi +} + +__build_fastdeploy_linux_x86_64_xpu_shared() { + + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DWITH_KUNLUNXIN=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_GPU=OFF \ + -DENABLE_ORT_BACKEND=ON \ + -DENABLE_PADDLE_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DENABLE_BENCHMARK=ON \ + -DBUILD_EXAMPLES=OFF \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][x86_64_xpu}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __build_fastdeploy_linux_x86_64_xpu_shared + exit 0 +} + +main + +# Usage: +# ./scripts/linux/build_linux_x86_64_cpp_gpu.sh From b732e4c711fdbad4979eabc203423309fabe8ba1 Mon Sep 17 00:00:00 2001 From: DefTruth <31974251+DefTruth@users.noreply.github.com> Date: Fri, 10 Feb 2023 12:00:43 +0800 Subject: [PATCH 10/14] [Bug Fix] Add bundle script for text api only pkg (#1295) [Android] Add bundle script for text api only pkg --- .../bundle_android_cpp_with_text_api_only.sh | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 scripts/android/bundle_android_cpp_with_text_api_only.sh diff --git a/scripts/android/bundle_android_cpp_with_text_api_only.sh b/scripts/android/bundle_android_cpp_with_text_api_only.sh new file mode 100755 index 000000000..a7c26f417 --- /dev/null +++ b/scripts/android/bundle_android_cpp_with_text_api_only.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e +set +x + +FASTDEPLOY_DIR=$(pwd) +BUILT_PACKAGE_DIR=build/Android +CXX_PACKAGE_PREFIX=fastdeploy-android-latest-shared-dev +CXX_PACKAGE_NAME=${BUILT_PACKAGE_DIR}/${CXX_PACKAGE_PREFIX} +ARMV8_CXX_PACKAGE_NAME=${BUILT_PACKAGE_DIR}/arm64-v8a-api-21/install +ARMV7_CXX_PACKAGE_NAME=${BUILT_PACKAGE_DIR}/armeabi-v7a-api-21/install + +# check package name +echo "[INFO] --- FASTDEPLOY_DIR: ${FASTDEPLOY_DIR}" + +# check arm v7 & v8 c++ sdk +if [ ! -d "${BUILT_PACKAGE_DIR}" ]; then + echo "[ERROR] --- ${BUILT_PACKAGE_DIR} not exist, please build c++ sdk first!" + exit 0 +fi +if [ ! -d "${ARMV8_CXX_PACKAGE_NAME}" ]; then + echo "[ERROR] --- ${ARMV8_CXX_PACKAGE_NAME} not exist, please build c++ sdk first!" + exit 0 +fi +if [ ! -d "${ARMV7_CXX_PACKAGE_NAME}" ]; then + echo "[ERROR] --- ${ARMV7_CXX_PACKAGE_NAME} not exist, please build c++ sdk first!" + exit 0 +fi + +# remove old package +echo "[INFO] --- Packing ${CXX_PACKAGE_NAME} package ..." +if [ -d "${CXX_PACKAGE_NAME}" ]; then + rm -rf ${CXX_PACKAGE_NAME} + echo "[INFO] --- Removed old ${CXX_PACKAGE_NAME} done !" + if [ -f "${CXX_PACKAGE_NAME}.tgz" ]; then + rm ${CXX_PACKAGE_NAME}.tgz + echo "[INFO] --- Removed old ${CXX_PACKAGE_NAME} done !" + fi +fi + +# package latest c++ sdk +mkdir ${CXX_PACKAGE_NAME} +echo "[INFO] --- Collecting package contents ..." +cp -r ${ARMV7_CXX_PACKAGE_NAME}/* ${CXX_PACKAGE_NAME}/ +cp -r ${ARMV8_CXX_PACKAGE_NAME}/* ${CXX_PACKAGE_NAME}/ +if [ -d "${CXX_PACKAGE_NAME}/examples" ]; then + rm -rf ${CXX_PACKAGE_NAME}/examples +fi +echo "[INFO] --- Removed examples files ..." +echo "[INFO] --- Removing static .a files: " +static_files=$(find ${CXX_PACKAGE_NAME}/third_libs/install/ -name "*.a") +if [ ${#static_files[@]} -gt 10 ]; then + echo "${#static_files[@]}: ${static_files}" + rm $(find ${CXX_PACKAGE_NAME}/third_libs/install/ -name "*.a") +fi +echo "[INFO] --- Taring ${CXX_PACKAGE_NAME}.tgz package ..." +tar -zcvf ${CXX_PACKAGE_NAME}.tgz ${CXX_PACKAGE_NAME}/* >> ${BUILT_PACKAGE_DIR}/pkg.log 2>&1 +echo "[INFO] --- Package ${CXX_PACKAGE_NAME}.tgz done ! Package size info: " +du -sh ${BUILT_PACKAGE_DIR}/* | grep ${CXX_PACKAGE_PREFIX} + +# Usage: +# ./scripts/android/bundle_android_cpp_with_text_api_only.sh From 59a4ab343f2d1dfc20861117ad05e483a8695f78 Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Fri, 10 Feb 2023 17:13:22 +0800 Subject: [PATCH 11/14] [Benchmark]Add ResourceUsageMonitor to collect memory info (#1269) * add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com> --- benchmark/cpp/benchmark_yolov5.cc | 53 +++---- benchmark/cpp/flags.h | 2 +- fastdeploy/benchmark/utils.cc | 145 ++++++++++++------ fastdeploy/benchmark/utils.h | 67 ++++++-- fastdeploy/runtime/runtime_option.h | 3 +- .../detection/contrib/rknpu2/postprocessor.h | 2 +- 6 files changed, 179 insertions(+), 93 deletions(-) mode change 100644 => 100755 benchmark/cpp/benchmark_yolov5.cc mode change 100644 => 100755 fastdeploy/runtime/runtime_option.h diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100644 new mode 100755 index ae16dd8d8..2e5df6b1c --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -17,8 +17,7 @@ #include "flags.h" bool RunModel(std::string model_file, std::string image_file, size_t warmup, - size_t repeats, size_t dump_period, std::string cpu_mem_file_name, - std::string gpu_mem_file_name) { + size_t repeats, size_t sampling_interval) { // Initialization auto option = fastdeploy::RuntimeOption(); if (!CreateRuntimeOption(&option)) { @@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, return false; } auto im = cv::imread(image_file); + // For collect memory info + fastdeploy::benchmark::ResourceUsageMonitor resource_moniter( + sampling_interval, FLAGS_device_id); + if (FLAGS_collect_memory_info) { + resource_moniter.Start(); + } // For Runtime if (FLAGS_profile_mode == "runtime") { fastdeploy::vision::DetectionResult res; @@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, return false; } } - std::vector end2end_statis; // Step2: repeat for repeats times std::cout << "Counting time..." << std::endl; - fastdeploy::TimeCounter tc; + std::cout << "Repeat " << repeats << " times..." << std::endl; fastdeploy::vision::DetectionResult res; + fastdeploy::TimeCounter tc; + tc.Start(); for (int i = 0; i < repeats; i++) { - if (FLAGS_collect_memory_info && i % dump_period == 0) { - fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); -#if defined(WITH_GPU) - fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, - FLAGS_device_id); -#endif - } - tc.Start(); if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return false; } - tc.End(); - end2end_statis.push_back(tc.Duration() * 1000); } - float end2end = std::accumulate(end2end_statis.end() - repeats, - end2end_statis.end(), 0.f) / - repeats; + tc.End(); + double end2end = tc.Duration() / repeats * 1000; std::cout << "End2End(ms): " << end2end << "ms." << std::endl; auto vis_im = fastdeploy::vision::VisDetection(im, res); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } + if (FLAGS_collect_memory_info) { + float cpu_mem = resource_moniter.GetMaxCpuMem(); + float gpu_mem = resource_moniter.GetMaxGpuMem(); + float gpu_util = resource_moniter.GetMaxGpuUtil(); + std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; + std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; + std::cout << "gpu_util: " << gpu_util << std::endl; + resource_moniter.Stop(); + } return true; } @@ -94,21 +98,10 @@ int main(int argc, char* argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); int repeats = FLAGS_repeat; int warmup = FLAGS_warmup; - int dump_period = FLAGS_dump_period; - std::string cpu_mem_file_name = "result_cpu.txt"; - std::string gpu_mem_file_name = "result_gpu.txt"; + int sampling_interval = FLAGS_sampling_interval; // Run model - if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, - cpu_mem_file_name, gpu_mem_file_name) != true) { + if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) { exit(1); } - if (FLAGS_collect_memory_info) { - float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); - std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; -#if defined(WITH_GPU) - float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); - std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; -#endif - } return 0; } \ No newline at end of file diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 6ecf9b33e..64f22c702 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -37,7 +37,7 @@ DEFINE_bool( "and 'lite' backend"); DEFINE_bool( collect_memory_info, false, "Whether to collect memory info"); -DEFINE_int32(dump_period, 100, "How often to collect memory info."); +DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms)."); void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path --device " diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc index 2b0bd9df1..a33db1dc2 100755 --- a/fastdeploy/benchmark/utils.cc +++ b/fastdeploy/benchmark/utils.cc @@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') { return str.substr(i, j + 1 - i); } -void DumpCurrentCpuMemoryUsage(const std::string& name) { +// Split string +static void split(const std::string& s, std::vector& tokens, + char delim = ' ') { + tokens.clear(); + size_t lastPos = s.find_first_not_of(delim, 0); + size_t pos = s.find(delim, lastPos); + while (lastPos != std::string::npos) { + tokens.emplace_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delim, pos); + pos = s.find(delim, lastPos); + } + return; +} + +ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) + : is_supported_(false), + sampling_interval_(sampling_interval_ms), + gpu_id_(gpu_id) { +#if defined(__linux__) || defined(__ANDROID__) + is_supported_ = true; +#else + is_supported_ = false; +#endif + if (!is_supported_) { + FDASSERT(false, + "Currently ResourceUsageMonitor only supports Linux and ANDROID.") + return; + } +} + +void ResourceUsageMonitor::Start() { + if (!is_supported_) return; + if (check_memory_thd_ != nullptr) { + FDINFO << "Memory monitoring has already started!" << std::endl; + return; + } + FDINFO << "Start monitoring memory!" << std::endl; + stop_signal_ = false; + check_memory_thd_.reset(new std::thread(([this]() { + // Note we retrieve the memory usage at the very beginning of the thread. + while (true) { + std::string cpu_mem_info = GetCurrentCpuMemoryInfo(); + // get max_cpu_mem + std::vector cpu_tokens; + split(cpu_mem_info, cpu_tokens, ' '); + max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024); +#if defined(WITH_GPU) + std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_); + // get max_gpu_mem and max_gpu_util + std::vector gpu_tokens; + split(gpu_mem_info, gpu_tokens, ','); + max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6])); + max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7])); +#endif + if (stop_signal_) break; + std::this_thread::sleep_for( + std::chrono::milliseconds(sampling_interval_)); + } + }))); +} + +void ResourceUsageMonitor::Stop() { + if (!is_supported_) { + return; + } + if (check_memory_thd_ == nullptr) { + FDINFO << "Memory monitoring hasn't started yet or has stopped!" + << std::endl; + return; + } + FDINFO << "Stop monitoring memory!" << std::endl; + StopInternal(); +} + +void ResourceUsageMonitor::StopInternal() { + stop_signal_ = true; + if (check_memory_thd_ == nullptr) { + return; + } + if (check_memory_thd_ != nullptr) { + check_memory_thd_->join(); + } + check_memory_thd_.reset(nullptr); +} + +std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() { + std::string result = ""; #if defined(__linux__) || defined(__ANDROID__) int iPid = static_cast(getpid()); std::string command = "pmap -x " + std::to_string(iPid) + " | grep total"; FILE* pp = popen(command.data(), "r"); - if (!pp) return; + if (!pp) return ""; char tmp[1024]; while (fgets(tmp, sizeof(tmp), pp) != NULL) { - std::ofstream write; - write.open(name, std::ios::app); - write << tmp; - write.close(); + result += tmp; } pclose(pp); #else FDASSERT(false, "Currently collect cpu memory info only supports Linux and ANDROID.") #endif - return; + return result; } -void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) { +std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) { + std::string result = ""; #if defined(__linux__) && defined(WITH_GPU) std::string command = "nvidia-smi --id=" + std::to_string(device_id) + " --query-gpu=index,uuid,name,timestamp,memory.total," "memory.free,memory.used,utilization.gpu,utilization." "memory --format=csv,noheader,nounits"; FILE* pp = popen(command.data(), "r"); - if (!pp) return; + if (!pp) return ""; char tmp[1024]; while (fgets(tmp, sizeof(tmp), pp) != NULL) { - std::ofstream write; - write.open(name, std::ios::app); - write << tmp; - write.close(); + result += tmp; } pclose(pp); #else FDASSERT(false, "Currently collect gpu memory info only supports Linux in GPU.") #endif - return; -} - -float GetCpuMemoryUsage(const std::string& name) { - std::ifstream read(name); - std::string line; - float max_cpu_mem = -1; - while (getline(read, line)) { - std::stringstream ss(line); - std::string tmp; - std::vector nums; - while (getline(ss, tmp, ' ')) { - tmp = strip(tmp); - if (tmp.empty()) continue; - nums.push_back(tmp); - } - max_cpu_mem = std::max(max_cpu_mem, stof(nums[3])); - } - return max_cpu_mem / 1024; -} - -float GetGpuMemoryUsage(const std::string& name) { - std::ifstream read(name); - std::string line; - float max_gpu_mem = -1; - while (getline(read, line)) { - std::stringstream ss(line); - std::string tmp; - std::vector nums; - while (getline(ss, tmp, ',')) { - tmp = strip(tmp); - if (tmp.empty()) continue; - nums.push_back(tmp); - } - max_gpu_mem = std::max(max_gpu_mem, stof(nums[6])); - } - return max_gpu_mem; + return result; } } // namespace benchmark diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h index 12770f365..f81cb29c1 100755 --- a/fastdeploy/benchmark/utils.h +++ b/fastdeploy/benchmark/utils.h @@ -13,23 +13,72 @@ // limitations under the License. #pragma once +#include +#include // NOLINT #include "fastdeploy/utils/utils.h" namespace fastdeploy { namespace benchmark { +/*! @brief ResourceUsageMonitor object used when to collect memory info. + */ +class FASTDEPLOY_DECL ResourceUsageMonitor { + public: + /** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor. + * + * \param[in] sampling_interval_ms How often to collect memory info(ms). + * \param[in] gpu_id Device(gpu) id, default 0. + */ + explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0); -// Record current cpu memory usage into file -FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name); + ~ResourceUsageMonitor() { StopInternal(); } -// Record current gpu memory usage into file -FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name, - int device_id); + /// Start memory info collect + void Start(); + /// Stop memory info collect + void Stop(); + /// Get maximum cpu memory usage + float GetMaxCpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_cpu_mem_; + } + /// Get maximum gpu memory usage + float GetMaxGpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_mem_; + } + /// Get maximum gpu util + float GetMaxGpuUtil() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_util_; + } -// Get Max cpu memory usage -FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name); + ResourceUsageMonitor(ResourceUsageMonitor&) = delete; + ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete; + ResourceUsageMonitor(ResourceUsageMonitor&&) = delete; + ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete; -// Get Max gpu memory usage -FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name); + private: + void StopInternal(); + // Get current cpu memory info + std::string GetCurrentCpuMemoryInfo(); + // Get current gpu memory info + std::string GetCurrentGpuMemoryInfo(int device_id); + + bool is_supported_ = false; + bool stop_signal_ = false; + const int sampling_interval_; + float max_cpu_mem_ = 0.0f; + float max_gpu_mem_ = 0.0f; + float max_gpu_util_ = 0.0f; + const int gpu_id_ = 0; + std::unique_ptr check_memory_thd_ = nullptr; +}; } // namespace benchmark } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h old mode 100644 new mode 100755 index 0aa6bbec8..c45dd2fe7 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -198,8 +198,7 @@ struct FASTDEPLOY_DECL RuntimeOption { // *** The belowing api are deprecated, will be removed in v1.2.0 // *** Do not use it anymore - - void SetPaddleMKLDNN(bool pd_mkldnn = true); + void SetPaddleMKLDNN(bool pd_mkldnn = true); void EnablePaddleToTrt(); void DeletePaddleBackendPass(const std::string& delete_pass_name); void EnablePaddleLogInfo(); diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index a6b6f0cc9..de52e6b1a 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -56,7 +56,7 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { float GetNMSThreshold() const { return nms_threshold_; } /// Set height and weight - void SetHeightAndWeight(int height,int width) { + void SetHeightAndWeight(int height, int width) { height_ = height; width_ = width; } From 6a3ac91057203035fba9b722ac8be251b6a323e9 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Sat, 11 Feb 2023 09:09:53 +0800 Subject: [PATCH 12/14] [Model] Update rkyolo pybind (#1294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新rkyolo pybind --- .../detection/contrib/rknpu2/postprocessor.h | 4 ++++ .../detection/contrib/rknpu2/rkyolo_pybind.cc | 4 +++- .../detection/contrib/rkyolo/rkyolov5.py | 21 ++++++++++--------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index de52e6b1a..7178b13b4 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -80,6 +80,10 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { obj_class_num_ = num; prob_box_size_ = obj_class_num_ + 5; } + /// Get the number of class + int GetClassNum() { + return obj_class_num_; + } private: std::vector anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62, diff --git a/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc b/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc index 716464458..5fe70b7fd 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc +++ b/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc @@ -65,7 +65,9 @@ void BindRKYOLO(pybind11::module& m) { .def_property("conf_threshold", &vision::detection::RKYOLOPostprocessor::GetConfThreshold, &vision::detection::RKYOLOPostprocessor::SetConfThreshold) .def_property("nms_threshold", &vision::detection::RKYOLOPostprocessor::GetNMSThreshold, - &vision::detection::RKYOLOPostprocessor::SetNMSThreshold); + &vision::detection::RKYOLOPostprocessor::SetNMSThreshold) + .def_property("class_num", &vision::detection::RKYOLOPostprocessor::GetClassNum, + &vision::detection::RKYOLOPostprocessor::SetClassNum); pybind11::class_(m, "RKYOLOV5") .def(pybind11::init Date: Sun, 12 Feb 2023 15:13:32 +0800 Subject: [PATCH 13/14] [Other] Update fast_tokenizer version (#1300) Update fast_tokenizer version --- cmake/fast_tokenizer.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/fast_tokenizer.cmake b/cmake/fast_tokenizer.cmake index 6e183dafe..fb32f8ea1 100644 --- a/cmake/fast_tokenizer.cmake +++ b/cmake/fast_tokenizer.cmake @@ -61,7 +61,7 @@ endif(WIN32) message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}") set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/") -set(FASTTOKENIZER_VERSION "1.0.1") +set(FASTTOKENIZER_VERSION "1.0.2") # Set download url if(WIN32) From e63f5f369e79deea00895a3f0ee3d24020068574 Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Sun, 12 Feb 2023 15:13:39 +0800 Subject: [PATCH 14/14] [Backend] Update paddle inference to 2.4-dev5 (#1302) update paddle inference to 2.4-dev5 --- cmake/paddle_inference.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index efd65394e..47d3d9fcf 100755 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -80,7 +80,7 @@ if(PADDLEINFERENCE_DIRECTORY) endif() else() set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") - set(PADDLEINFERENCE_VERSION "2.4-dev4") + set(PADDLEINFERENCE_VERSION "2.4-dev5") if(WIN32) if (WITH_GPU) set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")