yolov5 servitization optimization (#262)

* yolov5 split pre and post process * yolov5 postprocess * yolov5 postprocess
2025-10-05 08:37:06 +08:00 · 2022-09-21 05:22:39 -05:00
parent 463ee0a088
commit a8e447f225
9 changed files with 194 additions and 77 deletions
--- a/fastdeploy/vision/detection/contrib/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "fastdeploy/vision/detection/contrib/yolov5.h"
+
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/utils/utils.h"

@@ -74,14 +75,14 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,

 bool YOLOv5::Initialize() {
  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-  multi_label = true;
+  size_ = {640, 640};
+  padding_value_ = {114.0, 114.0, 114.0};
+  is_mini_pad_ = false;
+  is_no_pad_ = false;
+  is_scale_up_ = false;
+  stride_ = 32;
+  max_wh_ = 7680.0;
+  multi_label_ = true;

  if (!InitRuntime()) {
    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
@@ -90,23 +91,34 @@ bool YOLOv5::Initialize() {
  // Check if the input shape is dynamic after Runtime already initialized,
  // Note that, We need to force is_mini_pad 'false' to keep static
  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
+  // TODO(qiuyanjun): remove
+  // is_dynamic_input_ = false;
+  // auto shape = InputInfoOfRuntime(0).shape;
+  // for (int i = 0; i < shape.size(); ++i) {
+  //   // if height or width is dynamic
+  //   if (i >= 2 && shape[i] <= 0) {
+  //     is_dynamic_input_ = true;
+  //     break;
+  //   }
+  // }
+  // if (!is_dynamic_input_) {
+  //   is_mini_pad_ = false;
+  // }
  return true;
 }

 bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info) {
+                        std::map<std::string, std::array<float, 2>>* im_info,
+                        const std::vector<int>& size,
+                        const std::vector<float> padding_value,
+                        bool is_mini_pad, bool is_no_pad, bool is_scale_up,
+                        int stride, float max_wh, bool multi_label) {
+  // Record the shape of image and the shape of preprocessed image
+  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
+                               static_cast<float>(mat->Width())};
+  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
+                                static_cast<float>(mat->Width())};
+
  // process after image load
  double ratio = (size[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
                                            static_cast<float>(mat->Width()));
@@ -145,9 +157,11 @@ bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
 }

 bool YOLOv5::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
+    std::vector<FDTensor>& infer_results, DetectionResult* result,
    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold, bool multi_label) {
+    float conf_threshold, float nms_iou_threshold, bool multi_label,
+    float max_wh) {
+  auto& infer_result = infer_results[0];
  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
  result->Clear();
  if (multi_label) {
@@ -251,13 +265,9 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,

  std::map<std::string, std::array<float, 2>> im_info;

-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
+  if (!Preprocess(&mat, &input_tensors[0], &im_info, size_, padding_value_,
+                  is_mini_pad_, is_no_pad_, is_scale_up_, stride_, max_wh_,
+                  multi_label_)) {
    FDERROR << "Failed to preprocess input image." << std::endl;
    return false;
  }
@@ -278,8 +288,8 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
  TIMERECORD_START(2)
 #endif

-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold, multi_label)) {
+  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
+                   nms_iou_threshold, multi_label_)) {
    FDERROR << "Failed to post process." << std::endl;
    return false;
  }
--- a/fastdeploy/vision/detection/contrib/yolov5.h
+++ b/fastdeploy/vision/detection/contrib/yolov5.h
@@ -41,38 +41,18 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
                       float conf_threshold = 0.25,
                       float nms_iou_threshold = 0.5);

-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-  // for different strategies to get boxes when postprocessing
-  bool multi_label;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
  // 输入图像预处理操作
  // Mat为FastDeploy定义的数据结构
  // FDTensor为预处理后的Tensor数据，传给后端进行推理
  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
+  static bool Preprocess(Mat* mat, FDTensor* output,
+                         std::map<std::string, std::array<float, 2>>* im_info,
+                         const std::vector<int>& size = {640, 640},
+                         const std::vector<float> padding_value = {114.0, 114.0,
+                                                                   114.0},
+                         bool is_mini_pad = false, bool is_no_pad = false,
+                         bool is_scale_up = false, int stride = 32,
+                         float max_wh = 7680.0, bool multi_label = true);

  // 后端推理结果后处理，输出给用户
  // infer_result 为后端推理后的输出Tensor
@@ -81,17 +61,45 @@ class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
  // conf_threshold 后处理时过滤box的置信度阈值
  // nms_iou_threshold 后处理时NMS设定的iou阈值
  // multi_label 后处理时box选取是否采用多标签方式
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold,
-                   bool multi_label);
+  static bool Postprocess(
+      std::vector<FDTensor>& infer_results, DetectionResult* result,
+      const std::map<std::string, std::array<float, 2>>& im_info,
+      float conf_threshold, float nms_iou_threshold, bool multi_label,
+      float max_wh = 7680.0);
+
+  // 以下为模型在预测时的一些参数，基本是前后处理所需
+  // 用户在创建模型后，可根据模型的要求，以及自己的需求
+  // 对参数进行修改
+  // tuple of (width, height)
+  std::vector<int> size_;
+  // padding value, size should be same with Channels
+  std::vector<float> padding_value_;
+  // only pad to the minimum rectange which height and width is times of stride
+  bool is_mini_pad_;
+  // while is_mini_pad = false and is_no_pad = true, will resize the image to
+  // the set size
+  bool is_no_pad_;
+  // if is_scale_up is false, the input image only can be zoom out, the maximum
+  // resize scale cannot exceed 1.0
+  bool is_scale_up_;
+  // padding stride, for is_mini_pad
+  int stride_;
+  // for offseting the boxes by classes when using NMS
+  float max_wh_;
+  // for different strategies to get boxes when postprocessing
+  bool multi_label_;
+
+ private:
+  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
+  bool Initialize();

  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
  bool IsDynamicInput() const { return is_dynamic_input_; }

-  void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
-                 bool _auto, bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
+  static void LetterBox(Mat* mat, std::vector<int> size,
+                        std::vector<float> color, bool _auto,
+                        bool scale_fill = false, bool scale_up = true,
+                        int stride = 32);

  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
  // or not.)
--- a/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
@@ -26,13 +26,43 @@ void BindYOLOv5(pybind11::module& m) {
             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
             return res;
           })
-      .def_readwrite("size", &vision::detection::YOLOv5::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOv5::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv5::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh)
-      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label);
+      .def_static("preprocess",
+                  [](pybind11::array& data, const std::vector<int>& size,
+                     const std::vector<float> padding_value, bool is_mini_pad,
+                     bool is_no_pad, bool is_scale_up, int stride, float max_wh,
+                     bool multi_label) {
+                    auto mat = PyArrayToCvMat(data);
+                    fastdeploy::vision::Mat fd_mat(mat);
+                    FDTensor output;
+                    std::map<std::string, std::array<float, 2>> im_info;
+                    vision::detection::YOLOv5::Preprocess(
+                        &fd_mat, &output, &im_info, size, padding_value,
+                        is_mini_pad, is_no_pad, is_scale_up, stride, max_wh,
+                        multi_label);
+                    return make_pair(TensorToPyArray(output), im_info);
+                  })
+      .def_static(
+          "postprocess",
+          [](std::vector<pybind11::array> infer_results,
+             const std::map<std::string, std::array<float, 2>>& im_info,
+             float conf_threshold, float nms_iou_threshold, bool multi_label,
+             float max_wh) {
+            std::vector<FDTensor> fd_infer_results(infer_results.size());
+            PyArrayToTensorList(infer_results, &fd_infer_results, true);
+            vision::DetectionResult result;
+            vision::detection::YOLOv5::Postprocess(
+                fd_infer_results, &result, im_info, conf_threshold,
+                nms_iou_threshold, multi_label, max_wh);
+            return result;
+          })
+      .def_readwrite("size", &vision::detection::YOLOv5::size_)
+      .def_readwrite("padding_value",
+                     &vision::detection::YOLOv5::padding_value_)
+      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad_)
+      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad_)
+      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up_)
+      .def_readwrite("stride", &vision::detection::YOLOv5::stride_)
+      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh_)
+      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label_);
 }
 }  // namespace fastdeploy