From ed19c759df9a1ad64f1d69bfa646acc01a1cc565 Mon Sep 17 00:00:00 2001
From: guxukai <44280887+GodIsBoom@users.noreply.github.com>
Date: Sun, 9 Apr 2023 10:38:18 +0800
Subject: [PATCH] [CVCUDA] Add CV-CUDA support in PaddleSeg (#1761)

* add cvcuda support in ppseg

* python and pybind

* add resize op, remove concat,std::move

* define resize op
---
 fastdeploy/vision/segmentation/ppseg/model.cc |   3 +-
 .../vision/segmentation/ppseg/ppseg_pybind.cc | 115 +++++++++++-------
 .../vision/segmentation/ppseg/preprocessor.cc |  56 +++++----
 .../vision/segmentation/ppseg/preprocessor.h  |  29 +++--
 .../vision/segmentation/ppseg/__init__.py     |  21 ++--
 5 files changed, 128 insertions(+), 96 deletions(-)
diff --git a/fastdeploy/vision/segmentation/ppseg/model.cc b/fastdeploy/vision/segmentation/ppseg/model.cc
index 2f5d45c5f..9e62fd9bc 100755
--- a/fastdeploy/vision/segmentation/ppseg/model.cc
+++ b/fastdeploy/vision/segmentation/ppseg/model.cc
@@ -79,7 +79,8 @@ bool PaddleSegModel::BatchPredict(const std::vector<cv::Mat>& imgs,
   std::vector<FDMat> fd_images = WrapMat(imgs);
   // Record the shape of input images
   std::map<std::string, std::vector<std::array<int, 2>>> imgs_info;
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &imgs_info)) {
+  preprocessor_.SetImgsInfo(&imgs_info);
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
     FDERROR << "Failed to preprocess input data while using model:"
             << ModelName() << "." << std::endl;
     return false;
diff --git a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
index 78c7c9ccc..19b38cf0a 100644
--- a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
+++ b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
@@ -15,44 +15,52 @@
 
 namespace fastdeploy {
 void BindPPSeg(pybind11::module& m) {
-  pybind11::class_<vision::segmentation::PaddleSegPreprocessor>(
-      m, "PaddleSegPreprocessor")
+  pybind11::class_<vision::segmentation::PaddleSegPreprocessor,
+                   vision::ProcessorManager>(m, "PaddleSegPreprocessor")
       .def(pybind11::init<std::string>())
       .def("run",
            [](vision::segmentation::PaddleSegPreprocessor& self,
               std::vector<pybind11::array>& im_list) {
              std::vector<vision::FDMat> images;
              for (size_t i = 0; i < im_list.size(); ++i) {
-                images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
-              }
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
              // Record the shape of input images
              std::map<std::string, std::vector<std::array<int, 2>>> imgs_info;
              std::vector<FDTensor> outputs;
-             if (!self.Run(&images, &outputs, &imgs_info)) {
-              throw std::runtime_error("Failed to preprocess the input data in PaddleSegPreprocessor.");
+             self.SetImgsInfo(&imgs_info);
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "PaddleSegPreprocessor.");
              }
              for (size_t i = 0; i < outputs.size(); ++i) {
                outputs[i].StopSharing();
              }
-             return make_pair(outputs, imgs_info);;
+             return make_pair(outputs, imgs_info);
+             ;
            })
-      .def("disable_normalize", [](vision::segmentation::PaddleSegPreprocessor& self) {
-        self.DisableNormalize();
-      })
-      .def("disable_permute", [](vision::segmentation::PaddleSegPreprocessor& self) {
-        self.DisablePermute();
-      })
-      .def_property("is_vertical_screen",
-                     &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen,
-		     &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen);
+      .def("disable_normalize",
+           [](vision::segmentation::PaddleSegPreprocessor& self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::segmentation::PaddleSegPreprocessor& self) {
+             self.DisablePermute();
+           })
+      .def_property(
+          "is_vertical_screen",
+          &vision::segmentation::PaddleSegPreprocessor::GetIsVerticalScreen,
+          &vision::segmentation::PaddleSegPreprocessor::SetIsVerticalScreen);
 
   pybind11::class_<vision::segmentation::PaddleSegModel, FastDeployModel>(
       m, "PaddleSegModel")
       .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
                           ModelFormat>())
-      .def("clone", [](vision::segmentation::PaddleSegModel& self) {
-        return self.Clone();
-      })
+      .def("clone",
+           [](vision::segmentation::PaddleSegModel& self) {
+             return self.Clone();
+           })
       .def("predict",
            [](vision::segmentation::PaddleSegModel& self,
               pybind11::array& data) {
@@ -62,48 +70,61 @@ void BindPPSeg(pybind11::module& m) {
              return res;
            })
       .def("batch_predict",
-           [](vision::segmentation::PaddleSegModel& self, std::vector<pybind11::array>& data) {
+           [](vision::segmentation::PaddleSegModel& self,
+              std::vector<pybind11::array>& data) {
              std::vector<cv::Mat> images;
              for (size_t i = 0; i < data.size(); ++i) {
-              images.push_back(PyArrayToCvMat(data[i]));
+               images.push_back(PyArrayToCvMat(data[i]));
              }
              std::vector<vision::SegmentationResult> results;
              self.BatchPredict(images, &results);
              return results;
            })
-      .def_property_readonly("preprocessor", &vision::segmentation::PaddleSegModel::GetPreprocessor)
-      .def_property_readonly("postprocessor", &vision::segmentation::PaddleSegModel::GetPostprocessor);
+      .def_property_readonly(
+          "preprocessor",
+          &vision::segmentation::PaddleSegModel::GetPreprocessor)
+      .def_property_readonly(
+          "postprocessor",
+          &vision::segmentation::PaddleSegModel::GetPostprocessor);
 
   pybind11::class_<vision::segmentation::PaddleSegPostprocessor>(
       m, "PaddleSegPostprocessor")
       .def(pybind11::init<std::string>())
-      .def("run", 
-           [](vision::segmentation::PaddleSegPostprocessor& self, 
+      .def("run",
+           [](vision::segmentation::PaddleSegPostprocessor& self,
               std::vector<FDTensor>& inputs,
-              const std::map<std::string, std::vector<std::array<int, 2>>>& imgs_info) {
-        std::vector<vision::SegmentationResult> results;
-        if (!self.Run(inputs, &results, imgs_info)) {
-          throw std::runtime_error("Failed to postprocess the runtime result in PaddleSegPostprocessor.");
-        }
-        return results;
-      })
+              const std::map<std::string, std::vector<std::array<int, 2>>>&
+                  imgs_info) {
+             std::vector<vision::SegmentationResult> results;
+             if (!self.Run(inputs, &results, imgs_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleSegPostprocessor.");
+             }
+             return results;
+           })
       .def("run",
            [](vision::segmentation::PaddleSegPostprocessor& self,
               std::vector<pybind11::array>& input_array,
-              const std::map<std::string, std::vector<std::array<int, 2>>>& imgs_info) {
-        std::vector<vision::SegmentationResult> results;
-        std::vector<FDTensor> inputs;
-        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
-        if (!self.Run(inputs, &results, imgs_info)) {
-          throw std::runtime_error("Failed to postprocess the runtime result in PaddleSegPostprocessor.");
-        }
-        return results;
-      })
-      .def_property("apply_softmax",
-                    &vision::segmentation::PaddleSegPostprocessor::GetApplySoftmax,
-		    &vision::segmentation::PaddleSegPostprocessor::SetApplySoftmax)
-      .def_property("store_score_map",
-                    &vision::segmentation::PaddleSegPostprocessor::GetStoreScoreMap,
-		    &vision::segmentation::PaddleSegPostprocessor::SetStoreScoreMap);
+              const std::map<std::string, std::vector<std::array<int, 2>>>&
+                  imgs_info) {
+             std::vector<vision::SegmentationResult> results;
+             std::vector<FDTensor> inputs;
+             PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+             if (!self.Run(inputs, &results, imgs_info)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "PaddleSegPostprocessor.");
+             }
+             return results;
+           })
+      .def_property(
+          "apply_softmax",
+          &vision::segmentation::PaddleSegPostprocessor::GetApplySoftmax,
+          &vision::segmentation::PaddleSegPostprocessor::SetApplySoftmax)
+      .def_property(
+          "store_score_map",
+          &vision::segmentation::PaddleSegPostprocessor::GetStoreScoreMap,
+          &vision::segmentation::PaddleSegPostprocessor::SetStoreScoreMap);
 }
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
index 92b037895..a2586cfce 100644
--- a/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
+++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.cc
@@ -21,7 +21,8 @@ namespace segmentation {
 
 PaddleSegPreprocessor::PaddleSegPreprocessor(const std::string& config_file) {
   this->config_file_ = config_file;
-  FDASSERT(BuildPreprocessPipelineFromConfig(), "Failed to create PaddleSegPreprocessor.");
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create PaddleSegPreprocessor.");
   initialized_ = true;
 }
 
@@ -35,7 +36,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
     FDERROR << "Failed to load yaml file " << config_file_
             << ", maybe you should check this file." << std::endl;
     return false;
-  }   
+  }
 
   if (cfg["Deploy"]["transforms"]) {
     auto preprocess_cfg = cfg["Deploy"]["transforms"];
@@ -76,7 +77,7 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
     if (input_height != -1 && input_width != -1 && !is_contain_resize_op_) {
       is_contain_resize_op_ = true;
       processors_.insert(processors_.begin(),
-          std::make_shared<Resize>(input_width, input_height));
+                         std::make_shared<Resize>(input_width, input_height));
     }
   }
   if (!disable_permute_) {
@@ -88,22 +89,24 @@ bool PaddleSegPreprocessor::BuildPreprocessPipelineFromConfig() {
   return true;
 }
 
-bool PaddleSegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, std::map<std::string, std::vector<std::array<int, 2>>>* imgs_info) {
-  
+bool PaddleSegPreprocessor::Apply(FDMatBatch* image_batch,
+                                  std::vector<FDTensor>* outputs) {
+  std::vector<FDMat>* images = image_batch->mats;
   if (!initialized_) {
     FDERROR << "The preprocessor is not initialized." << std::endl;
     return false;
   }
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   std::vector<std::array<int, 2>> shape_info;
   for (const auto& image : *images) {
-    shape_info.push_back({static_cast<int>(image.Height()),
-                          static_cast<int>(image.Width())});
+    shape_info.push_back(
+        {static_cast<int>(image.Height()), static_cast<int>(image.Width())});
   }
-  (*imgs_info)["shape_info"] = shape_info;
+  (*imgs_info_)["shape_info"] = shape_info;
   for (size_t i = 0; i < processors_.size(); ++i) {
     if (processors_[i]->Name() == "Resize") {
       auto processor = dynamic_cast<Resize*>(processors_[i].get());
@@ -123,13 +126,17 @@ bool PaddleSegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor
   // Batch preprocess : resize all images to the largest image shape in batch
   if (!is_contain_resize_op_ && img_num > 1) {
     int max_width = 0;
-    int max_height = 0; 
+    int max_height = 0;
     for (size_t i = 0; i < img_num; ++i) {
       max_width = std::max(max_width, ((*images)[i]).Width());
       max_height = std::max(max_height, ((*images)[i]).Height());
     }
+    pre_resize_op_->SetWidthAndHeight(max_width, max_height);
     for (size_t i = 0; i < img_num; ++i) {
-      Resize::Run(&(*images)[i], max_width, max_height);
+      if (!(*pre_resize_op_)(&(*images)[i])) {
+        FDERROR << "Failed to batch resize max_width and max_height"
+                << std::endl;
+      }
     }
   }
   for (size_t i = 0; i < img_num; ++i) {
@@ -142,32 +149,29 @@ bool PaddleSegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor
     }
   }
   outputs->resize(1);
-  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(img_num);
-  for (size_t i = 0; i < img_num; ++i) {
-    (*images)[i].ShareWithTensor(&(tensors[i]));
-    tensors[i].ExpandDim(0);
-  }
-  if (tensors.size() == 1) {
-    (*outputs)[0] = std::move(tensors[0]);
-  } else {
-    function::Concat(tensors, &((*outputs)[0]), 0);
-  }
+  FDTensor* tensor = image_batch->Tensor();
+  (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
   return true;
 }
 
 void PaddleSegPreprocessor::DisableNormalize() {
   this->disable_normalize_ = true;
-  // the DisableNormalize function will be invalid if the configuration file is loaded during preprocessing
+  // the DisableNormalize function will be invalid if the configuration file is
+  // loaded during preprocessing
   if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
   }
 }
 void PaddleSegPreprocessor::DisablePermute() {
   this->disable_permute_ = true;
-  // the DisablePermute function will be invalid if the configuration file is loaded during preprocessing
+  // the DisablePermute function will be invalid if the configuration file is
+  // loaded during preprocessing
   if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
+    FDERROR << "Failed to build preprocess pipeline from configuration file."
+            << std::endl;
   }
 }
 }  // namespace segmentation
diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.h b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
index 1b27863e4..b4e89ae84 100644
--- a/fastdeploy/vision/segmentation/ppseg/preprocessor.h
+++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
+#include "fastdeploy/vision/common/processors/manager.h"
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
 
@@ -20,7 +21,7 @@ namespace vision {
 namespace segmentation {
 /*! @brief Preprocessor object for PaddleSeg serials model.
   */
-class FASTDEPLOY_DECL PaddleSegPreprocessor {
+class FASTDEPLOY_DECL PaddleSegPreprocessor : public ProcessorManager {
  public:
   /** \brief Create a preprocessor instance for PaddleSeg serials model
    *
@@ -28,17 +29,16 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
    */
   explicit PaddleSegPreprocessor(const std::string& config_file);
 
-  /** \brief Process the input image and prepare input tensors for runtime
+  /** \brief Implement the virtual function of ProcessorManager, Apply() is the
+   *  body of Run(). Apply() contains the main logic of preprocessing, Run() is
+   *  called by users to execute preprocessing
    *
-   * \param[in] images The input image data list, all the elements are returned by cv::imread()
+   * \param[in] image_batch The input image batch
    * \param[in] outputs The output tensors which will feed in runtime
-   * \param[in] imgs_info The original input images shape info map, key is "shape_info", value is vector<array<int, 2>> a{{height, width}} 
    * \return true if the preprocess successed, otherwise false
    */
-  virtual bool Run(
-    std::vector<FDMat>* images,
-    std::vector<FDTensor>* outputs,
-    std::map<std::string, std::vector<std::array<int, 2>>>* imgs_info);
+  virtual bool Apply(FDMatBatch* image_batch,
+                     std::vector<FDTensor>* outputs);
 
   /// Get is_vertical_screen property of PP-HumanSeg model, default is false
   bool GetIsVerticalScreen() const {
@@ -54,6 +54,15 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
   void DisableNormalize();
   /// This function will disable hwc2chw in preprocessing step.
   void DisablePermute();
+  /// This function will set imgs_info_ in PaddleSegPreprocessor
+  void SetImgsInfo(
+          std::map<std::string, std::vector<std::array<int, 2>>>* imgs_info) {
+    imgs_info_ = imgs_info;
+  }
+  /// This function will get imgs_info_ in PaddleSegPreprocessor
+  std::map<std::string, std::vector<std::array<int, 2>>>* GetImgsInfo() {
+    return imgs_info_;
+  }
 
  private:
   virtual bool BuildPreprocessPipelineFromConfig();
@@ -72,6 +81,10 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
   bool is_contain_resize_op_ = false;
 
   bool initialized_ = false;
+
+  std::map<std::string, std::vector<std::array<int, 2>>>* imgs_info_;
+  std::shared_ptr<Resize> pre_resize_op_ =
+        std::make_shared<Resize>(0, 0);
 };
 
 }  // namespace segmentation
diff --git a/python/fastdeploy/vision/segmentation/ppseg/__init__.py b/python/fastdeploy/vision/segmentation/ppseg/__init__.py
index f0106a39a..d7616a5cd 100644
--- a/python/fastdeploy/vision/segmentation/ppseg/__init__.py
+++ b/python/fastdeploy/vision/segmentation/ppseg/__init__.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
 import logging
 from .... import FastDeployModel, ModelFormat
 from .... import c_lib_wrap as C
+from ...common import ProcessorManager
 
 
 class PaddleSegModel(FastDeployModel):
@@ -87,34 +88,26 @@ class PaddleSegModel(FastDeployModel):
         return self._model.postprocessor
 
 
-class PaddleSegPreprocessor:
+class PaddleSegPreprocessor(ProcessorManager):
     def __init__(self, config_file):
         """Create a preprocessor for PaddleSegModel from configuration file
 
         :param config_file: (str)Path of configuration file, e.g ppliteseg/deploy.yaml
         """
-        self._preprocessor = C.vision.segmentation.PaddleSegPreprocessor(
+        self._manager = C.vision.segmentation.PaddleSegPreprocessor(
             config_file)
 
-    def run(self, input_ims):
-        """Preprocess input images for PaddleSegModel
-
-        :param input_ims: (list of numpy.ndarray)The input image
-        :return: list of FDTensor
-        """
-        return self._preprocessor.run(input_ims)
-
     def disable_normalize(self):
         """
         This function will disable normalize in preprocessing step.
         """
-        self._preprocessor.disable_normalize()
+        self._manager.disable_normalize()
 
     def disable_permute(self):
         """
         This function will disable hwc2chw in preprocessing step.
         """
-        self._preprocessor.disable_permute()
+        self._manager.disable_permute()
 
     @property
     def is_vertical_screen(self):
@@ -122,7 +115,7 @@ class PaddleSegPreprocessor:
 
         :return: value of is_vertical_screen(bool)
         """
-        return self._preprocessor.is_vertical_screen
+        return self._manager.is_vertical_screen
 
     @is_vertical_screen.setter
     def is_vertical_screen(self, value):
@@ -133,7 +126,7 @@ class PaddleSegPreprocessor:
         assert isinstance(
             value,
             bool), "The value to set `is_vertical_screen` must be type of bool."
-        self._preprocessor.is_vertical_screen = value
+        self._manager.is_vertical_screen = value
 
 
 class PaddleSegPostprocessor: