diff --git a/.gitignore b/.gitignore index 3cd7ff865..c1706dee3 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ fastdeploy/pybind/main.cc python/fastdeploy/libs/lib* __pycache__ build_fastdeploy_android.sh -python/scripts/process_libraries.py \ No newline at end of file +python/scripts/process_libraries.py +.vs \ No newline at end of file diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/fastdeploy/vision/detection/contrib/nanodet_plus.cc index a779df500..2babae49c 100644 --- a/fastdeploy/vision/detection/contrib/nanodet_plus.cc +++ b/fastdeploy/vision/detection/contrib/nanodet_plus.cc @@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.h b/fastdeploy/vision/detection/contrib/nanodet_plus.h index 9dfb9f65a..57b472a0b 100644 --- a/fastdeploy/vision/detection/contrib/nanodet_plus.h +++ b/fastdeploy/vision/detection/contrib/nanodet_plus.h @@ -26,28 +26,18 @@ namespace detection { class FASTDEPLOY_DECL NanoDetPlus : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file NanoDetPlus(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "nanodet"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 + virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.35f, float nms_iou_threshold = 0.5f); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of input size (width, height), e.g (320, 320) std::vector size; // padding value, size should be same with Channels @@ -64,27 +54,15 @@ class FASTDEPLOY_DECL NanoDetPlus : public FastDeployModel { int reg_max; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } // whether to inference with dynamic shape (e.g ONNX export with dynamic shape diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc index 5308a612f..38c765208 100644 --- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc +++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc @@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER}; diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.h b/fastdeploy/vision/detection/contrib/scaledyolov4.h index d48a5036c..b2ce5d697 100644 --- a/fastdeploy/vision/detection/contrib/scaledyolov4.h +++ b/fastdeploy/vision/detection/contrib/scaledyolov4.h @@ -23,28 +23,17 @@ namespace detection { class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file ScaledYOLOv4(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "ScaledYOLOv4"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -63,29 +52,15 @@ class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel { float max_wh; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc index ec770f32d..15885634c 100644 --- a/fastdeploy/vision/detection/contrib/yolor.cc +++ b/fastdeploy/vision/detection/contrib/yolor.cc @@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER}; @@ -192,7 +192,6 @@ bool YOLOR::Postprocess( float pad_h = (out_h - ipt_h * scale) / 2.0f; float pad_w = (out_w - ipt_w * scale) / 2.0f; if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 pad_h = static_cast(static_cast(pad_h) % stride); pad_w = static_cast(static_cast(pad_w) % stride); } diff --git a/fastdeploy/vision/detection/contrib/yolor.h b/fastdeploy/vision/detection/contrib/yolor.h index 8940969a5..f979b2dd1 100644 --- a/fastdeploy/vision/detection/contrib/yolor.h +++ b/fastdeploy/vision/detection/contrib/yolor.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,27 +23,16 @@ namespace detection { class FASTDEPLOY_DECL YOLOR : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOR(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "YOLOR"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -62,29 +51,15 @@ class FASTDEPLOY_DECL YOLOR : public FastDeployModel { float max_wh; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/detection/contrib/yolov5.h b/fastdeploy/vision/detection/contrib/yolov5.h index 5f44acbe5..05aae90b1 100644 --- a/fastdeploy/vision/detection/contrib/yolov5.h +++ b/fastdeploy/vision/detection/contrib/yolov5.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,28 +23,16 @@ namespace detection { class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOv5(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "yolov5"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 static bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info, const std::vector& size = {640, 640}, @@ -54,22 +42,12 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { bool is_scale_up = false, int stride = 32, float max_wh = 7680.0, bool multi_label = true); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - // multi_label 后处理时box选取是否采用多标签方式 static bool Postprocess( std::vector& infer_results, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold, bool multi_label, float max_wh = 7680.0); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size_; // padding value, size should be same with Channels @@ -90,10 +68,8 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { bool multi_label_; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } static void LetterBox(Mat* mat, std::vector size, diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc index fbd2978a8..33c93bb8c 100644 --- a/fastdeploy/vision/detection/contrib/yolov5lite.cc +++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc @@ -86,8 +86,8 @@ YOLOv5Lite::YOLOv5Lite(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -244,7 +244,6 @@ bool YOLOv5Lite::PostprocessWithDecode( float pad_h = (out_h - ipt_h * scale) / 2.0f; float pad_w = (out_w - ipt_w * scale) / 2.0f; if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 pad_h = static_cast(static_cast(pad_h) % stride); pad_w = static_cast(static_cast(pad_w) % stride); } @@ -314,7 +313,6 @@ bool YOLOv5Lite::Postprocess( float pad_h = (out_h - ipt_h * scale) / 2.0f; float pad_w = (out_w - ipt_w * scale) / 2.0f; if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 pad_h = static_cast(static_cast(pad_h) % stride); pad_w = static_cast(static_cast(pad_w) % stride); } diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.h b/fastdeploy/vision/detection/contrib/yolov5lite.h index fb717c9c8..0b8a88086 100644 --- a/fastdeploy/vision/detection/contrib/yolov5lite.h +++ b/fastdeploy/vision/detection/contrib/yolov5lite.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,26 +23,16 @@ namespace detection { class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOv5Lite(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "YOLOv5-Lite"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 + virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.45, float nms_iou_threshold = 0.25); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -84,27 +74,16 @@ class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { float anchor_h; }; - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 + bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // YOLOv5Lite的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode // the official YOLOv5Lite/export.py will export ONNX file without decode // module. // this fuction support the postporocess for ONNX file without decode module. @@ -114,13 +93,11 @@ class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, int stride = 32); + // generate anchors for decodeing when ONNX file without decode module. void GenerateAnchors(const std::vector& size, const std::vector& downsample_strides, diff --git a/fastdeploy/vision/detection/contrib/yolov6.h b/fastdeploy/vision/detection/contrib/yolov6.h index 51419e205..f951e65a1 100644 --- a/fastdeploy/vision/detection/contrib/yolov6.h +++ b/fastdeploy/vision/detection/contrib/yolov6.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,27 +26,16 @@ namespace detection { class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOv6(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "YOLOv6"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -66,27 +55,15 @@ class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel { float max_wh; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* outputs, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } void LetterBox(Mat* mat, std::vector size, std::vector color, diff --git a/fastdeploy/vision/detection/contrib/yolov7.cc b/fastdeploy/vision/detection/contrib/yolov7.cc index 1684f2fc4..e776a8c6c 100644 --- a/fastdeploy/vision/detection/contrib/yolov7.cc +++ b/fastdeploy/vision/detection/contrib/yolov7.cc @@ -191,7 +191,6 @@ bool YOLOv7::Postprocess( float pad_h = (out_h - ipt_h * scale) / 2.0f; float pad_w = (out_w - ipt_w * scale) / 2.0f; if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 pad_h = static_cast(static_cast(pad_h) % stride); pad_w = static_cast(static_cast(pad_w) % stride); } diff --git a/fastdeploy/vision/detection/contrib/yolov7.h b/fastdeploy/vision/detection/contrib/yolov7.h index ffbab559b..009050aba 100644 --- a/fastdeploy/vision/detection/contrib/yolov7.h +++ b/fastdeploy/vision/detection/contrib/yolov7.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,21 +27,12 @@ class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "yolov7"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -60,29 +51,15 @@ class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { float max_wh; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h index 16f7e0bec..f8cfef828 100644 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,20 +28,11 @@ class FASTDEPLOY_DECL YOLOv7End2EndORT : public FastDeployModel { const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "yolov7end2end_ort"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -58,28 +49,15 @@ class FASTDEPLOY_DECL YOLOv7End2EndORT : public FastDeployModel { int stride; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h index 61d11dceb..8b97b8090 100644 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.h @@ -28,20 +28,11 @@ class FASTDEPLOY_DECL YOLOv7End2EndTRT : public FastDeployModel { const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "yolov7end2end_trt"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -58,29 +49,16 @@ class FASTDEPLOY_DECL YOLOv7End2EndTRT : public FastDeployModel { int stride; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 bool Postprocess(std::vector& infer_results, DetectionResult* result, const std::map>& im_info, float conf_threshold); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/detection/contrib/yolox.h b/fastdeploy/vision/detection/contrib/yolox.h index d9e88bf30..2c6c39608 100644 --- a/fastdeploy/vision/detection/contrib/yolox.h +++ b/fastdeploy/vision/detection/contrib/yolox.h @@ -26,27 +26,16 @@ namespace detection { class FASTDEPLOY_DECL YOLOX : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOX(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "YOLOX"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, DetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -63,33 +52,20 @@ class FASTDEPLOY_DECL YOLOX : public FastDeployModel { float max_wh; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* outputs, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // YOLOX的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode bool PostprocessWithDecode( FDTensor& infer_result, DetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } // whether to inference with dynamic shape (e.g ONNX export with dynamic shape diff --git a/fastdeploy/vision/facedet/contrib/retinaface.cc b/fastdeploy/vision/facedet/contrib/retinaface.cc index 5e0fd6645..6f38f5636 100644 --- a/fastdeploy/vision/facedet/contrib/retinaface.cc +++ b/fastdeploy/vision/facedet/contrib/retinaface.cc @@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/facedet/contrib/retinaface.h b/fastdeploy/vision/facedet/contrib/retinaface.h index d14396c6b..bd0cdefcf 100644 --- a/fastdeploy/vision/facedet/contrib/retinaface.h +++ b/fastdeploy/vision/facedet/contrib/retinaface.h @@ -25,27 +25,16 @@ namespace facedet { class FASTDEPLOY_DECL RetinaFace : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file RetinaFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "Pytorch_Retinaface"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25f, float nms_iou_threshold = 0.4f); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height), default (640, 640) std::vector size; // variance in RetinaFace's prior-box(anchor) generate process, @@ -60,28 +49,16 @@ class FASTDEPLOY_DECL RetinaFace : public FastDeployModel { int landmarks_per_face; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(std::vector& infer_result, FaceDetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } bool is_dynamic_input_; diff --git a/fastdeploy/vision/facedet/contrib/scrfd.cc b/fastdeploy/vision/facedet/contrib/scrfd.cc index 4602c3825..d87d66068 100644 --- a/fastdeploy/vision/facedet/contrib/scrfd.cc +++ b/fastdeploy/vision/facedet/contrib/scrfd.cc @@ -63,8 +63,8 @@ SCRFD::SCRFD(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -218,7 +218,6 @@ bool SCRFD::Postprocess( float pad_h = (out_h - ipt_h * scale) / 2.0f; float pad_w = (out_w - ipt_w * scale) / 2.0f; if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 pad_h = static_cast(static_cast(pad_h) % stride); pad_w = static_cast(static_cast(pad_w) % stride); } diff --git a/fastdeploy/vision/facedet/contrib/scrfd.h b/fastdeploy/vision/facedet/contrib/scrfd.h index b409cbc4f..0323e982a 100644 --- a/fastdeploy/vision/facedet/contrib/scrfd.h +++ b/fastdeploy/vision/facedet/contrib/scrfd.h @@ -26,27 +26,16 @@ namespace facedet { class FASTDEPLOY_DECL SCRFD : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file SCRFD(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "scrfd"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25f, float nms_iou_threshold = 0.4f); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height), default (640, 640) std::vector size; // downsample strides (namely, steps) for SCRFD to @@ -75,22 +64,11 @@ class FASTDEPLOY_DECL SCRFD : public FastDeployModel { unsigned int num_anchors; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(std::vector& infer_result, FaceDetectionResult* result, const std::map>& im_info, @@ -98,9 +76,6 @@ class FASTDEPLOY_DECL SCRFD : public FastDeployModel { void GeneratePoints(); - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 void LetterBox(Mat* mat, const std::vector& size, const std::vector& color, bool _auto, bool scale_fill = false, bool scale_up = true, diff --git a/fastdeploy/vision/facedet/contrib/ultraface.cc b/fastdeploy/vision/facedet/contrib/ultraface.cc index 5aba23b9e..e7dd99dc4 100644 --- a/fastdeploy/vision/facedet/contrib/ultraface.cc +++ b/fastdeploy/vision/facedet/contrib/ultraface.cc @@ -27,8 +27,8 @@ UltraFace::UltraFace(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/facedet/contrib/ultraface.h b/fastdeploy/vision/facedet/contrib/ultraface.h index c26d9dd7a..8627f49aa 100644 --- a/fastdeploy/vision/facedet/contrib/ultraface.h +++ b/fastdeploy/vision/facedet/contrib/ultraface.h @@ -25,55 +25,32 @@ namespace facedet { class FASTDEPLOY_DECL UltraFace : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file UltraFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.7f, float nms_iou_threshold = 0.3f); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height), default (320, 240) std::vector size; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* outputs, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(std::vector& infer_result, FaceDetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } bool is_dynamic_input_; diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.cc b/fastdeploy/vision/facedet/contrib/yolov5face.cc index cc8d3ce83..d4753e306 100644 --- a/fastdeploy/vision/facedet/contrib/yolov5face.cc +++ b/fastdeploy/vision/facedet/contrib/yolov5face.cc @@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.h b/fastdeploy/vision/facedet/contrib/yolov5face.h index bc12876c0..42357b3ea 100644 --- a/fastdeploy/vision/facedet/contrib/yolov5face.h +++ b/fastdeploy/vision/facedet/contrib/yolov5face.h @@ -25,27 +25,16 @@ namespace facedet { class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file YOLOv5Face(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "yolov5-face"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, float conf_threshold = 0.25, float nms_iou_threshold = 0.5); - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 // tuple of (width, height) std::vector size; // padding value, size should be same with Channels @@ -66,27 +55,15 @@ class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel { int landmarks_per_face; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* outputs, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 bool Postprocess(FDTensor& infer_result, FaceDetectionResult* result, const std::map>& im_info, float conf_threshold, float nms_iou_threshold); - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 bool IsDynamicInput() const { return is_dynamic_input_; } bool is_dynamic_input_; diff --git a/fastdeploy/vision/faceid/contrib/arcface.cc b/fastdeploy/vision/faceid/contrib/arcface.cc index 1583c7883..5f6cb834d 100644 --- a/fastdeploy/vision/faceid/contrib/arcface.cc +++ b/fastdeploy/vision/faceid/contrib/arcface.cc @@ -31,13 +31,8 @@ ArcFace::ArcFace(const std::string& model_file, const std::string& params_file, } bool ArcFace::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend + + // (1) if parent class initialed backend if (initialized) { // (1.1) re-init parameters for specific sub-classes size = {112, 112}; @@ -47,7 +42,7 @@ bool ArcFace::Initialize() { l2_normalize = false; return true; } - // (2) 如果父类没有初始化backend + // (2) if parent class not initialed backend if (!InsightFaceRecognitionModel::Initialize()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; @@ -62,19 +57,15 @@ bool ArcFace::Initialize() { } bool ArcFace::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Preprocess(mat, output); } bool ArcFace::Postprocess(std::vector& infer_result, FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Postprocess(infer_result, result); } bool ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess return InsightFaceRecognitionModel::Predict(im, result); } diff --git a/fastdeploy/vision/faceid/contrib/arcface.h b/fastdeploy/vision/faceid/contrib/arcface.h index 247beb5d5..c5bd0f064 100644 --- a/fastdeploy/vision/faceid/contrib/arcface.h +++ b/fastdeploy/vision/faceid/contrib/arcface.h @@ -26,36 +26,21 @@ namespace faceid { class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干 ArcFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const override { return "deepinsight/insightface/recognition/arcface_pytorch"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize() override; - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* mat, FDTensor* output) override; - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 bool Postprocess(std::vector& infer_result, FaceRecognitionResult* result) override; }; diff --git a/fastdeploy/vision/faceid/contrib/cosface.cc b/fastdeploy/vision/faceid/contrib/cosface.cc index c0b5144f6..5dffcc02a 100644 --- a/fastdeploy/vision/faceid/contrib/cosface.cc +++ b/fastdeploy/vision/faceid/contrib/cosface.cc @@ -31,13 +31,7 @@ CosFace::CosFace(const std::string& model_file, const std::string& params_file, } bool CosFace::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend + if (initialized) { // (1.1) re-init parameters for specific sub-classes size = {112, 112}; @@ -47,7 +41,6 @@ bool CosFace::Initialize() { l2_normalize = false; return true; } - // (2) 如果父类没有初始化backend if (!InsightFaceRecognitionModel::Initialize()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; @@ -62,19 +55,15 @@ bool CosFace::Initialize() { } bool CosFace::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Preprocess(mat, output); } bool CosFace::Postprocess(std::vector& infer_result, FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Postprocess(infer_result, result); } bool CosFace::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess return InsightFaceRecognitionModel::Predict(im, result); } diff --git a/fastdeploy/vision/faceid/contrib/cosface.h b/fastdeploy/vision/faceid/contrib/cosface.h index 493326750..d7fc27356 100644 --- a/fastdeploy/vision/faceid/contrib/cosface.h +++ b/fastdeploy/vision/faceid/contrib/cosface.h @@ -26,37 +26,21 @@ namespace faceid { class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干 CosFace(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 - // insightface/arcface提供的模型文件包含了cosface std::string ModelName() const override { return "deepinsight/insightface/recognition/arcface_pytorch"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize() override; - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* mat, FDTensor* output) override; - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 bool Postprocess(std::vector& infer_result, FaceRecognitionResult* result) override; }; diff --git a/fastdeploy/vision/faceid/contrib/insightface_rec.cc b/fastdeploy/vision/faceid/contrib/insightface_rec.cc index 006568255..f24a5191d 100644 --- a/fastdeploy/vision/faceid/contrib/insightface_rec.cc +++ b/fastdeploy/vision/faceid/contrib/insightface_rec.cc @@ -26,8 +26,8 @@ InsightFaceRecognitionModel::InsightFaceRecognitionModel( const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; diff --git a/fastdeploy/vision/faceid/contrib/insightface_rec.h b/fastdeploy/vision/faceid/contrib/insightface_rec.h index 9ed3fb39f..2e4b970f9 100644 --- a/fastdeploy/vision/faceid/contrib/insightface_rec.h +++ b/fastdeploy/vision/faceid/contrib/insightface_rec.h @@ -25,21 +25,15 @@ namespace faceid { class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // 支持insightface/recognition人脸识别模型的基类 InsightFaceRecognitionModel( const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 virtual std::string ModelName() const { return "deepinsight/insightface"; } - // 以下为一些可供用户修改的属性 // tuple of (width, height), default (112, 112) std::vector size; - // 归一化的 alpha 和 beta,x'=x*alpha+beta std::vector alpha; std::vector beta; // whether to swap the B and R channel, such as BGR->RGB, default true. @@ -47,22 +41,12 @@ class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel { // whether to apply l2 normalize to embedding values, default; bool l2_normalize; - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 virtual bool Predict(cv::Mat* im, FaceRecognitionResult* result); - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 virtual bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 virtual bool Preprocess(Mat* mat, FDTensor* output); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 virtual bool Postprocess(std::vector& infer_result, FaceRecognitionResult* result); }; diff --git a/fastdeploy/vision/faceid/contrib/partial_fc.cc b/fastdeploy/vision/faceid/contrib/partial_fc.cc index 2ad3d3a52..1ef1f218b 100644 --- a/fastdeploy/vision/faceid/contrib/partial_fc.cc +++ b/fastdeploy/vision/faceid/contrib/partial_fc.cc @@ -32,13 +32,7 @@ PartialFC::PartialFC(const std::string& model_file, } bool PartialFC::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend + if (initialized) { // (1.1) re-init parameters for specific sub-classes size = {112, 112}; @@ -48,7 +42,6 @@ bool PartialFC::Initialize() { l2_normalize = false; return true; } - // (2) 如果父类没有初始化backend if (!InsightFaceRecognitionModel::Initialize()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; @@ -63,19 +56,15 @@ bool PartialFC::Initialize() { } bool PartialFC::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Preprocess(mat, output); } bool PartialFC::Postprocess(std::vector& infer_result, FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Postprocess(infer_result, result); } bool PartialFC::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess return InsightFaceRecognitionModel::Predict(im, result); } diff --git a/fastdeploy/vision/faceid/contrib/partial_fc.h b/fastdeploy/vision/faceid/contrib/partial_fc.h index cbbda4364..275b33cfa 100644 --- a/fastdeploy/vision/faceid/contrib/partial_fc.h +++ b/fastdeploy/vision/faceid/contrib/partial_fc.h @@ -26,35 +26,21 @@ namespace faceid { class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file PartialFC(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const override { return "deepinsight/insightface/recognition/partial_fc"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize() override; - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* mat, FDTensor* output) override; - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 bool Postprocess(std::vector& infer_result, FaceRecognitionResult* result) override; }; diff --git a/fastdeploy/vision/faceid/contrib/vpl.cc b/fastdeploy/vision/faceid/contrib/vpl.cc index b423c3348..841c889a9 100644 --- a/fastdeploy/vision/faceid/contrib/vpl.cc +++ b/fastdeploy/vision/faceid/contrib/vpl.cc @@ -30,13 +30,7 @@ VPL::VPL(const std::string& model_file, const std::string& params_file, } bool VPL::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend + if (initialized) { // (1.1) re-init parameters for specific sub-classes size = {112, 112}; @@ -46,7 +40,6 @@ bool VPL::Initialize() { l2_normalize = false; return true; } - // (2) 如果父类没有初始化backend if (!InsightFaceRecognitionModel::Initialize()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; @@ -61,19 +54,15 @@ bool VPL::Initialize() { } bool VPL::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Preprocess(mat, output); } bool VPL::Postprocess(std::vector& infer_result, FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 return InsightFaceRecognitionModel::Postprocess(infer_result, result); } bool VPL::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess return InsightFaceRecognitionModel::Predict(im, result); } diff --git a/fastdeploy/vision/faceid/contrib/vpl.h b/fastdeploy/vision/faceid/contrib/vpl.h index dd5476ba8..a6c73d782 100644 --- a/fastdeploy/vision/faceid/contrib/vpl.h +++ b/fastdeploy/vision/faceid/contrib/vpl.h @@ -26,36 +26,21 @@ namespace faceid { class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // VPL支持IResNet, IResNet1024骨干 VPL(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const override { return "deepinsight/insightface/recognition/vpl"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize() override; - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* mat, FDTensor* output) override; - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 bool Postprocess(std::vector& infer_result, FaceRecognitionResult* result) override; }; diff --git a/fastdeploy/vision/matting/contrib/modnet.cc b/fastdeploy/vision/matting/contrib/modnet.cc index 1d30e0b6a..a52d9b1e8 100644 --- a/fastdeploy/vision/matting/contrib/modnet.cc +++ b/fastdeploy/vision/matting/contrib/modnet.cc @@ -26,8 +26,8 @@ MODNet::MODNet(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -93,7 +93,6 @@ bool MODNet::Postprocess( return false; } - // 先获取alpha并resize (使用opencv) auto iter_ipt = im_info.find("input_shape"); auto iter_out = im_info.find("output_shape"); FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), @@ -103,7 +102,6 @@ bool MODNet::Postprocess( int ipt_h = iter_ipt->second[0]; int ipt_w = iter_ipt->second[1]; - // TODO: 需要修改成FDTensor或Mat的运算 现在依赖cv::Mat float* alpha_ptr = static_cast(alpha_tensor.Data()); cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. @@ -116,7 +114,6 @@ bool MODNet::Postprocess( result->Clear(); // note: must be setup shape before Resize result->contain_foreground = false; - // 和输入原图大小对应的alpha result->shape = {static_cast(ipt_h), static_cast(ipt_w)}; int numel = ipt_h * ipt_w; int nbytes = numel * sizeof(float); diff --git a/fastdeploy/vision/matting/contrib/modnet.h b/fastdeploy/vision/matting/contrib/modnet.h index 228af1e89..a50277205 100644 --- a/fastdeploy/vision/matting/contrib/modnet.h +++ b/fastdeploy/vision/matting/contrib/modnet.h @@ -25,42 +25,27 @@ namespace matting { class FASTDEPLOY_DECL MODNet : public FastDeployModel { public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file MODNet(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::ONNX); - // 定义模型的名称 std::string ModelName() const { return "matting/MODNet"; } - // 以下为一些可供用户修改的属性 // tuple of (width, height), default (256, 256) std::vector size; - // 归一化的 alpha 和 beta,x'=x*alpha+beta std::vector alpha; std::vector beta; // whether to swap the B and R channel, such as BGR->RGB, default true. bool swap_rb; - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 bool Predict(cv::Mat* im, MattingResult* result); private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* mat, FDTensor* output, std::map>* im_info); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 bool Postprocess(std::vector& infer_result, MattingResult* result, const std::map>& im_info); }; diff --git a/fastdeploy/vision/matting/ppmatting/ppmatting.cc b/fastdeploy/vision/matting/ppmatting/ppmatting.cc index 3f2b8d4f8..a3d0a25e4 100644 --- a/fastdeploy/vision/matting/ppmatting/ppmatting.cc +++ b/fastdeploy/vision/matting/ppmatting/ppmatting.cc @@ -168,7 +168,6 @@ bool PPMatting::Postprocess( return false; } - // 先获取alpha并resize (使用opencv) auto iter_ipt = im_info.find("input_shape"); auto iter_out = im_info.find("output_shape"); auto resize_by_long = im_info.find("resize_by_long"); @@ -179,7 +178,6 @@ bool PPMatting::Postprocess( int ipt_h = iter_ipt->second[0]; int ipt_w = iter_ipt->second[1]; - // TODO: 需要修改成FDTensor或Mat的运算 现在依赖cv::Mat float* alpha_ptr = static_cast(alpha_tensor.Data()); cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); cv::Mat cropped_alpha; @@ -202,7 +200,6 @@ bool PPMatting::Postprocess( result->Clear(); // note: must be setup shape before Resize result->contain_foreground = false; - // 和输入原图大小对应的alpha result->shape = {static_cast(ipt_h), static_cast(ipt_w)}; int numel = ipt_h * ipt_w; int nbytes = numel * sizeof(float); diff --git a/fastdeploy/vision/ocr/ppocr/classifier.cc b/fastdeploy/vision/ocr/ppocr/classifier.cc index 435f18444..13fd69a07 100644 --- a/fastdeploy/vision/ocr/ppocr/classifier.cc +++ b/fastdeploy/vision/ocr/ppocr/classifier.cc @@ -27,8 +27,8 @@ Classifier::Classifier(const std::string& model_file, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -81,7 +81,6 @@ void OcrClassifierResizeImage(Mat* mat, } } -//预处理 bool Classifier::Preprocess(Mat* mat, FDTensor* output) { // 1. cls resizes // 2. normalize @@ -99,7 +98,6 @@ bool Classifier::Preprocess(Mat* mat, FDTensor* output) { return true; } -//后处理 bool Classifier::Postprocess(FDTensor& infer_result, std::tuple* cls_result) { std::vector output_shape = infer_result.shape; @@ -119,7 +117,6 @@ bool Classifier::Postprocess(FDTensor& infer_result, return true; } -//预测 bool Classifier::Predict(cv::Mat* img, std::tuple* cls_result) { Mat mat(*img); std::vector input_tensors(1); diff --git a/fastdeploy/vision/ocr/ppocr/classifier.h b/fastdeploy/vision/ocr/ppocr/classifier.h index 397775de6..110ef7f37 100644 --- a/fastdeploy/vision/ocr/ppocr/classifier.h +++ b/fastdeploy/vision/ocr/ppocr/classifier.h @@ -25,16 +25,12 @@ namespace ocr { class FASTDEPLOY_DECL Classifier : public FastDeployModel { public: Classifier(); - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file Classifier(const std::string& model_file, const std::string& params_file = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); - // 定义模型的名称 std::string ModelName() const { return "ppocr/ocr_cls"; } - // 模型预测接口,即用户调用的接口 virtual bool Predict(cv::Mat* img, std::tuple* result); // pre & post parameters @@ -47,15 +43,10 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { bool is_scale; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 bool Preprocess(Mat* img, FDTensor* output); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor bool Postprocess(FDTensor& infer_result, std::tuple* result); }; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc index 5ed3bae2c..09309aec1 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc @@ -27,8 +27,8 @@ DBDetector::DBDetector(const std::string& model_file, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -91,7 +91,6 @@ void OcrDetectorResizeImage(Mat* img, int max_size_len, float* ratio_h, *ratio_w = float(resize_w) / float(w); } -//预处理 bool DBDetector::Preprocess( Mat* mat, FDTensor* output, std::map>* im_info) { @@ -111,7 +110,6 @@ bool DBDetector::Preprocess( return true; } -//后处理 bool DBDetector::Postprocess( FDTensor& infer_result, std::vector>* boxes_result, const std::map>& im_info) { @@ -166,7 +164,6 @@ bool DBDetector::Postprocess( return true; } -//预测 bool DBDetector::Predict(cv::Mat* img, std::vector>* boxes_result) { Mat mat(*img); diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.h b/fastdeploy/vision/ocr/ppocr/dbdetector.h index 316035aa6..ad80c1329 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.h +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.h @@ -30,10 +30,8 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); - // 定义模型的名称 std::string ModelName() const { return "ppocr/ocr_det"; } - // 模型预测接口,即用户调用的接口 virtual bool Predict(cv::Mat* im, std::vector>* boxes_result); @@ -54,20 +52,15 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel { bool is_scale; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 bool Preprocess(Mat* mat, FDTensor* outputs, std::map>* im_info); - // 后端推理结果后处理,输出给用户 bool Postprocess(FDTensor& infer_result, std::vector>* boxes_result, const std::map>& im_info); - // OCR后处理类 PostProcessor post_processor_; }; diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.cc b/fastdeploy/vision/ocr/ppocr/recognizer.cc index 8202ece6b..f6ba5294a 100644 --- a/fastdeploy/vision/ocr/ppocr/recognizer.cc +++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc @@ -45,8 +45,8 @@ Recognizer::Recognizer(const std::string& model_file, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -56,7 +56,6 @@ Recognizer::Recognizer(const std::string& model_file, runtime_option.model_format = model_format; runtime_option.model_file = model_file; runtime_option.params_file = params_file; - // Recognizer在使用CPU推理,并把PaddleInference作为推理后端时,需要删除以下2个pass// runtime_option.DeletePaddleBackendPass("matmul_transpose_reshape_fuse_pass"); runtime_option.DeletePaddleBackendPass( "matmul_transpose_reshape_mkldnn_fuse_pass"); @@ -111,7 +110,6 @@ void OcrRecognizerResizeImage(Mat* mat, const float& wh_ratio, Pad::Run(mat, 0, 0, 0, int(imgW - mat->Width()), value); } -//预处理 bool Recognizer::Preprocess(Mat* mat, FDTensor* output, const std::vector& rec_image_shape) { int imgH = rec_image_shape[1]; @@ -134,7 +132,6 @@ bool Recognizer::Preprocess(Mat* mat, FDTensor* output, return true; } -//后处理 bool Recognizer::Postprocess(FDTensor& infer_result, std::tuple* rec_result) { std::vector output_shape = infer_result.shape; @@ -174,7 +171,6 @@ bool Recognizer::Postprocess(FDTensor& infer_result, return true; } -//预测 bool Recognizer::Predict(cv::Mat* img, std::tuple* rec_result) { Mat mat(*img); diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.h b/fastdeploy/vision/ocr/ppocr/recognizer.h index a6fea9117..ebe99d1e8 100644 --- a/fastdeploy/vision/ocr/ppocr/recognizer.h +++ b/fastdeploy/vision/ocr/ppocr/recognizer.h @@ -25,17 +25,13 @@ namespace ocr { class FASTDEPLOY_DECL Recognizer : public FastDeployModel { public: Recognizer(); - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file Recognizer(const std::string& model_file, const std::string& params_file = "", const std::string& label_path = "", const RuntimeOption& custom_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE); - // 定义模型的名称 std::string ModelName() const { return "ppocr/ocr_rec"; } - // 模型预测接口,即用户调用的接口 virtual bool Predict(cv::Mat* img, std::tuple* rec_result); @@ -51,15 +47,11 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { bool is_scale; private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 bool Initialize(); - // 输入图像预处理操作 bool Preprocess(Mat* img, FDTensor* outputs, const std::vector& rec_image_shape); - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor bool Postprocess(FDTensor& infer_result, std::tuple* rec_result); }; diff --git a/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc b/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc index c339932b9..1c1735dc4 100644 --- a/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/get_rotate_crop_image.cc @@ -31,7 +31,6 @@ cv::Mat GetRotateCropImage(const cv::Mat& srcimage, tmp.push_back(box[2 * i + 1]); points.push_back(tmp); } - // box转points int x_collect[4] = {box[0], box[2], box[4], box[6]}; int y_collect[4] = {box[1], box[3], box[5], box[7]}; int left = int(*std::min_element(x_collect, x_collect + 4)); @@ -39,7 +38,6 @@ cv::Mat GetRotateCropImage(const cv::Mat& srcimage, int top = int(*std::min_element(y_collect, y_collect + 4)); int bottom = int(*std::max_element(y_collect, y_collect + 4)); - //得到rect矩形 cv::Mat img_crop; image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop); @@ -65,14 +63,12 @@ cv::Mat GetRotateCropImage(const cv::Mat& srcimage, pointsf[2] = cv::Point2f(points[2][0], points[2][1]); pointsf[3] = cv::Point2f(points[3][0], points[3][1]); - //透视变换矩阵 cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std); cv::Mat dst_img; cv::warpPerspective(img_crop, dst_img, M, cv::Size(img_crop_width, img_crop_height), cv::BORDER_REPLICATE); - //完成透视变换 if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) { cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth()); diff --git a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc index 40869e063..02e435f76 100644 --- a/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc +++ b/fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.cc @@ -20,7 +20,6 @@ namespace fastdeploy { namespace vision { namespace ocr { -//获取轮廓区域 void PostProcessor::GetContourArea(const std::vector> &box, float unclip_ratio, float &distance) { int pts_num = 4; @@ -71,7 +70,6 @@ cv::RotatedRect PostProcessor::UnClip(std::vector> box, return res; } -//将图像的矩阵转换为float类型的array数组返回 float **PostProcessor::Mat2Vec(cv::Mat mat) { auto **array = new float *[mat.rows]; for (int i = 0; i < mat.rows; ++i) array[i] = new float[mat.cols]; @@ -84,8 +82,6 @@ float **PostProcessor::Mat2Vec(cv::Mat mat) { return array; } -//对点进行顺时针方向的排序(从左到右,从上到下) (order points -// clockwise[顺时针方向]) std::vector> PostProcessor::OrderPointsClockwise( std::vector> pts) { std::vector> box = pts; @@ -103,7 +99,6 @@ std::vector> PostProcessor::OrderPointsClockwise( return rect; } -//将图像的矩阵转换为float类型的vector数组返回 std::vector> PostProcessor::Mat2Vector(cv::Mat mat) { std::vector> img_vec; std::vector tmp; @@ -118,7 +113,6 @@ std::vector> PostProcessor::Mat2Vector(cv::Mat mat) { return img_vec; } -//判断元素为浮点数float的vector的精度,如果a中元素的精度不等于b中元素的精度,则返回false bool PostProcessor::XsortFp32(std::vector a, std::vector b) { if (a[0] != b[0]) return a[0] < b[0]; return false; @@ -248,8 +242,6 @@ float PostProcessor::BoxScoreFast(std::vector> box_array, return score; } -//这个应该是DB(差分二值化)相关的内容,方法从 Bitmap 图中获取检测框 -//涉及到box_thresh(低于这个阈值的boxs不予显示)和det_db_unclip_ratio(文本框扩张的系数,关系到文本框的大小) std::vector>> PostProcessor::BoxesFromBitmap( const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { diff --git a/fastdeploy/vision/utils/cosine_similarity.cc b/fastdeploy/vision/utils/cosine_similarity.cc index 70d4e31dd..20482ada9 100644 --- a/fastdeploy/vision/utils/cosine_similarity.cc +++ b/fastdeploy/vision/utils/cosine_similarity.cc @@ -20,7 +20,6 @@ namespace utils { float CosineSimilarity(const std::vector& a, const std::vector& b, bool normalized) { - // 计算余弦相似度 FDASSERT((a.size() == b.size()) && (a.size() != 0), "The size of a and b must be equal and >= 1."); size_t num_val = a.size(); diff --git a/fastdeploy/vision/visualize/matting_alpha.cc b/fastdeploy/vision/visualize/matting_alpha.cc index 03e003224..0c8bd3fa3 100644 --- a/fastdeploy/vision/visualize/matting_alpha.cc +++ b/fastdeploy/vision/visualize/matting_alpha.cc @@ -24,7 +24,6 @@ namespace vision { cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im, const MattingResult& result, bool remove_small_connected_area) { - // 只可视化alpha,fgr(前景)本身就是一张图 不需要可视化 FDASSERT((!im.empty()), "im can't be empty!"); FDASSERT((im.channels() == 3), "Only support 3 channels mat!"); @@ -33,7 +32,6 @@ cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im, int out_w = static_cast(result.shape[1]); int height = im.rows; int width = im.cols; - // alpha to cv::Mat && 避免resize等操作修改外部数据 std::vector alpha_copy; alpha_copy.assign(result.alpha.begin(), result.alpha.end()); float* alpha_ptr = static_cast(alpha_copy.data()); diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc index d6dd96e25..b5dfa5571 100644 --- a/fastdeploy/vision/visualize/ocr.cc +++ b/fastdeploy/vision/visualize/ocr.cc @@ -24,11 +24,9 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { auto vis_im = im.clone(); for (int n = 0; n < ocr_result.boxes.size(); n++) { - //遍历每一个盒子 cv::Point rook_points[4]; for (int m = 0; m < 4; m++) { - //对每一个盒子 array rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]), int(ocr_result.boxes[n][m * 2 + 1])); } diff --git a/fastdeploy/vision/visualize/remove_small_connnected_area.cc b/fastdeploy/vision/visualize/remove_small_connnected_area.cc index c523c9987..c8ff0d3d3 100644 --- a/fastdeploy/vision/visualize/remove_small_connnected_area.cc +++ b/fastdeploy/vision/visualize/remove_small_connnected_area.cc @@ -23,8 +23,6 @@ namespace vision { cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat& alpha_pred, float threshold) { - // 移除小的联通区域和噪点 开闭合形态学处理 - // 假设输入的是透明度alpha, 值域(0.,1.) cv::Mat gray, binary; alpha_pred.convertTo(gray, CV_8UC1, 255.f); cv::Mat alpha_pred_clone = alpha_pred.clone(); diff --git a/fastdeploy/vision/visualize/swap_background.cc b/fastdeploy/vision/visualize/swap_background.cc index e8ad04b77..25ff56892 100644 --- a/fastdeploy/vision/visualize/swap_background.cc +++ b/fastdeploy/vision/visualize/swap_background.cc @@ -25,7 +25,6 @@ cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background, const MattingResult& result, bool remove_small_connected_area) { - // 只可视化alpha,fgr(前景)本身就是一张图 不需要可视化 FDASSERT((!im.empty()), "Image can't be empty!"); FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); FDASSERT((!background.empty()), "Background image can't be empty!"); @@ -39,7 +38,6 @@ cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im, int width = im.cols; int bg_height = background.rows; int bg_width = background.cols; - // alpha to cv::Mat && 避免resize等操作修改外部数据 std::vector alpha_copy; alpha_copy.assign(result.alpha.begin(), result.alpha.end()); float* alpha_ptr = static_cast(alpha_copy.data()); @@ -76,9 +74,7 @@ cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im, return vis_img; } -// 对SegmentationResult做背景替换,由于分割模型可以预测多个类别,其中 -// background_label 表示预测为背景类的标签 -// 由于不同模型和数据集训练的背景类别标签可能不同,用户可以自己输入背景类对应的标签。 + cv::Mat Visualize::SwapBackgroundSegmentation( const cv::Mat& im, const cv::Mat& background, int background_label, const SegmentationResult& result) {