Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy into develop

2025-10-10 19:10:20 +08:00 · 2022-11-07 14:11:02 +00:00
parent b314d66bb7 16266969a1
commit 21836f8af5
7 changed files with 131 additions and 26 deletions
--- a/examples/runtime/README.md
+++ b/examples/runtime/README.md
@@ -1,16 +1,16 @@
-# FastDeploy Runtime推理示例
+# FastDeploy Runtime examples

-| 示例代码 | 编程语言 | 说明 |
+| Example Code | Program Language | Description |
 | :------- | :------- | :---- |
-| python/infer_paddle_paddle_inference.py | Python | paddle模型通过paddle inference在cpu/gpu上的推理 |
-| python/infer_paddle_tensorrt.py | Python | paddle模型通过tensorrt在gpu上的推理 |
-| python/infer_paddle_openvino.py | Python | paddle模型通过openvino在cpu上的推理 |
-| python/infer_paddle_onnxruntime.py | Python | paddle模型通过onnx runtime在cpu/gpu上的推理 |
-| python/infer_onnx_openvino.py | Python | onnx模型通过openvino在cpu上的推理 |
-| python/infer_onnx_tensorrt.py | Python | onnx模型通过tensorrt在gpu上的推理 |
-| cpp/infer_paddle_paddle_inference.cc | C++ | paddle模型通过paddle inference在cpu/gpu上的推理 |
-| cpp/infer_paddle_tensorrt.cc | C++ | paddle模型通过tensorrt在gpu上的推理 |
-| cpp/infer_paddle_openvino.cc | C++ | paddle模型通过openvino在cpu上的推理 |
-| cpp/infer_paddle_onnxruntime.cc | C++ | paddle模型通过onnx runtime在cpu/gpu上的推理 |
-| cpp/infer_onnx_openvino.cc | C++ | onnx模型通过openvino在cpu上的推理 |
-| cpp/infer_onnx_tensorrt.cc | C++ | onnx模型通过tensorrt在gpu上的推理 |
+| python/infer_paddle_paddle_inference.py | Python | Deploy Paddle model with Paddle Inference(CPU/GPU) |
+| python/infer_paddle_tensorrt.py | Python | Deploy Paddle model with TensorRT(GPU) |
+| python/infer_paddle_openvino.py | Python | Deploy Paddle model with OpenVINO(CPU)  |
+| python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU)  |
+| python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) |
+| python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) |
+| cpp/infer_paddle_paddle_inference.cc | C++ | Deploy Paddle model with Paddle Inference(CPU/GPU) |
+| cpp/infer_paddle_tensorrt.cc | C++ | Deploy Paddle model with TensorRT(GPU) |
+| cpp/infer_paddle_openvino.cc | C++ | Deploy Paddle model with OpenVINO(CPU |
+| cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
+| cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) |
+| cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) |
--- a/fastdeploy/vision/common/processors/normalize.cc
+++ b/fastdeploy/vision/common/processors/normalize.cc
@@ -19,7 +19,7 @@ namespace vision {
 Normalize::Normalize(const std::vector<float>& mean,
                     const std::vector<float>& std, bool is_scale,
                     const std::vector<float>& min,
-                     const std::vector<float>& max) {
+                     const std::vector<float>& max, bool swap_rb) {
  FDASSERT(mean.size() == std.size(),
           "Normalize: requires the size of mean equal to the size of std.");
  std::vector<double> mean_(mean.begin(), mean.end());
@@ -50,6 +50,7 @@ Normalize::Normalize(const std::vector<float>& mean,
    alpha_.push_back(alpha);
    beta_.push_back(beta);
  }
+  swap_rb_ = swap_rb;
 }

 bool Normalize::ImplByOpenCV(Mat* mat) {
@@ -57,6 +58,7 @@ bool Normalize::ImplByOpenCV(Mat* mat) {

  std::vector<cv::Mat> split_im;
  cv::split(*im, split_im);
+  if (swap_rb_) std::swap(split_im[0], split_im[2]);
  for (int c = 0; c < im->channels(); c++) {
    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
  }
@@ -79,9 +81,13 @@ bool Normalize::ImplByFlyCV(Mat* mat) {
    std[i] = 1.0 / alpha_[i];
    mean[i] = -1 * beta_[i] * std[i];
  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
  fcv::Mat new_im(im->width(), im->height(),
                  fcv::FCVImageType::PKG_BGR_F32);
-  fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector<uint32_t>(),
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
                                       new_im, true);
  mat->SetMat(new_im);
  return true;
@@ -91,8 +97,8 @@ bool Normalize::ImplByFlyCV(Mat* mat) {
 bool Normalize::Run(Mat* mat, const std::vector<float>& mean,
                    const std::vector<float>& std, bool is_scale,
                    const std::vector<float>& min,
-                    const std::vector<float>& max, ProcLib lib) {
-  auto n = Normalize(mean, std, is_scale, min, max);
+                    const std::vector<float>& max, ProcLib lib, bool swap_rb) {
+  auto n = Normalize(mean, std, is_scale, min, max, swap_rb);
  return n(mat, lib);
 }

--- a/fastdeploy/vision/common/processors/normalize.h
+++ b/fastdeploy/vision/common/processors/normalize.h
@@ -23,7 +23,8 @@ class FASTDEPLOY_DECL Normalize : public Processor {
  Normalize(const std::vector<float>& mean, const std::vector<float>& std,
            bool is_scale = true,
            const std::vector<float>& min = std::vector<float>(),
-            const std::vector<float>& max = std::vector<float>());
+            const std::vector<float>& max = std::vector<float>(),
+            bool swap_rb = false);
  bool ImplByOpenCV(Mat* mat);
 #ifdef ENABLE_FLYCV
  bool ImplByFlyCV(Mat* mat);
@@ -44,14 +45,23 @@ class FASTDEPLOY_DECL Normalize : public Processor {
                  const std::vector<float>& std, bool is_scale = true,
                  const std::vector<float>& min = std::vector<float>(),
                  const std::vector<float>& max = std::vector<float>(),
-                  ProcLib lib = ProcLib::DEFAULT);
+                  ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false);

  std::vector<float> GetAlpha() const { return alpha_; }
  std::vector<float> GetBeta() const { return beta_; }

+  bool GetSwapRB() {
+    return swap_rb_;
+  }
+
+  void SetSwapRB(bool swap_rb) {
+    swap_rb_ = swap_rb;
+  }
+
 private:
  std::vector<float> alpha_;
  std::vector<float> beta_;
+  bool swap_rb_;
 };
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/common/processors/normalize_and_permute.cc
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc
@@ -21,7 +21,8 @@ NormalizeAndPermute::NormalizeAndPermute(const std::vector<float>& mean,
                                         const std::vector<float>& std,
                                         bool is_scale,
                                         const std::vector<float>& min,
-                                         const std::vector<float>& max) {
+                                         const std::vector<float>& max,
+                                         bool swap_rb) {
  FDASSERT(mean.size() == std.size(),
           "Normalize: requires the size of mean equal to the size of std.");
  std::vector<double> mean_(mean.begin(), mean.end());
@@ -52,6 +53,7 @@ NormalizeAndPermute::NormalizeAndPermute(const std::vector<float>& mean,
    alpha_.push_back(alpha);
    beta_.push_back(beta);
  }
+  swap_rb_ = swap_rb;
 }

 bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) {
@@ -60,6 +62,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) {
  int origin_h = im->rows;
  std::vector<cv::Mat> split_im;
  cv::split(*im, split_im);
+  if (swap_rb_) std::swap(split_im[0], split_im[2]);
  for (int c = 0; c < im->channels(); c++) {
    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
  }
@@ -94,8 +97,12 @@ bool NormalizeAndPermute::ImplByFlyCV(Mat* mat) {
    std[i] = 1.0 / alpha_[i];
    mean[i] = -1 * beta_[i] * std[i];
  }
+
+  std::vector<uint32_t> channel_reorder_index = {0, 1, 2};
+  if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]);
+
  fcv::Mat new_im;
-  fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector<uint32_t>(),
+  fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index,
                                       new_im, false);
  mat->SetMat(new_im);
  mat->layout = Layout::CHW;
@@ -106,8 +113,9 @@ bool NormalizeAndPermute::ImplByFlyCV(Mat* mat) {
 bool NormalizeAndPermute::Run(Mat* mat, const std::vector<float>& mean,
                              const std::vector<float>& std, bool is_scale,
                              const std::vector<float>& min,
-                              const std::vector<float>& max, ProcLib lib) {
-  auto n = NormalizeAndPermute(mean, std, is_scale, min, max);
+                              const std::vector<float>& max, ProcLib lib,
+                              bool swap_rb) {
+  auto n = NormalizeAndPermute(mean, std, is_scale, min, max, swap_rb);
  return n(mat, lib);
 }

--- a/fastdeploy/vision/common/processors/normalize_and_permute.h
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.h
@@ -23,7 +23,8 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor {
  NormalizeAndPermute(const std::vector<float>& mean,
                      const std::vector<float>& std, bool is_scale = true,
                      const std::vector<float>& min = std::vector<float>(),
-                      const std::vector<float>& max = std::vector<float>());
+                      const std::vector<float>& max = std::vector<float>(),
+                      bool swap_rb = false);
  bool ImplByOpenCV(Mat* mat);
 #ifdef ENABLE_FLYCV
  bool ImplByFlyCV(Mat* mat);
@@ -44,7 +45,7 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor {
                  const std::vector<float>& std, bool is_scale = true,
                  const std::vector<float>& min = std::vector<float>(),
                  const std::vector<float>& max = std::vector<float>(),
-                  ProcLib lib = ProcLib::DEFAULT);
+                  ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false);

  void SetAlpha(const std::vector<float>& alpha) {
    alpha_.clear();
@@ -58,9 +59,18 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor {
    beta_.assign(beta.begin(), beta.end());
  }

+  bool GetSwapRB() {
+    return swap_rb_;
+  }
+
+  void SetSwapRB(bool swap_rb) {
+    swap_rb_ = swap_rb;
+  }
+
 private:
  std::vector<float> alpha_;
  std::vector<float> beta_;
+  bool swap_rb_;
 };
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/common/processors/transform.cc
+++ b/fastdeploy/vision/common/processors/transform.cc
@@ -95,10 +95,77 @@ void FuseNormalizeHWC2CHW(
         << std::endl;
 }

+void FuseNormalizeColorConvert(
+    std::vector<std::shared_ptr<Processor>>* processors) {
+  // Fuse Normalize and BGR2RGB/RGB2BGR
+  int normalize_index = -1;
+  int color_convert_index = -1;
+  // If these middle processors are after BGR2RGB/RGB2BGR and before Normalize,
+  // we can still fuse Normalize and BGR2RGB/RGB2BGR
+  static std::unordered_set<std::string> middle_processors(
+      {"Resize", "ResizeByShort", "ResizeByLong", "Crop", "CenterCrop",
+       "LimitByStride", "LimitShort", "Pad", "PadToSize", "StridePad",
+       "WarpAffine"});
+
+  for (size_t i = 0; i < processors->size(); ++i) {
+    if ((*processors)[i]->Name() == "BGR2RGB" ||
+        (*processors)[i]->Name() == "RGB2BGR") {
+      color_convert_index = i;
+      for (size_t j = color_convert_index + 1; j < processors->size(); ++j) {
+        if ((*processors)[j]->Name() == "Normalize" ||
+            (*processors)[j]->Name() == "NormalizeAndPermute") {
+          normalize_index = j;
+          break;
+        }
+      }
+      if (normalize_index < 0) {
+        return;
+      }
+      for (size_t j = color_convert_index + 1; j < normalize_index; ++j) {
+        if (middle_processors.count((*processors)[j]->Name())) {
+          continue;
+        }
+        return;
+      }
+    }
+  }
+
+  if (color_convert_index < 0) {
+    return;
+  }
+
+  // Delete Color Space Convert
+  std::string color_processor_name = (*processors)[color_convert_index]->Name();
+  processors->erase(processors->begin() + color_convert_index);
+
+  // Toggle the swap_rb option of the Normalize processor
+  std::string normalize_processor_name =
+      (*processors)[normalize_index - 1]->Name();
+  bool swap_rb;
+  if (normalize_processor_name == "Normalize") {
+    auto processor = dynamic_cast<Normalize*>(
+        (*processors)[normalize_index - 1].get());
+    swap_rb = processor->GetSwapRB();
+    processor->SetSwapRB(!swap_rb);
+  } else if (normalize_processor_name == "NormalizeAndPermute") {
+    auto processor = dynamic_cast<NormalizeAndPermute*>(
+        (*processors)[normalize_index - 1].get());
+    swap_rb = processor->GetSwapRB();
+    processor->SetSwapRB(!swap_rb);
+  } else {
+    FDASSERT(false, "Something wrong in FuseNormalizeColorConvert().");
+  }
+
+  FDINFO << color_processor_name << " and " << normalize_processor_name
+         << " are fused to " << normalize_processor_name
+         << " with swap_rb=" << !swap_rb << std::endl;
+}
+
 void FuseTransforms(
    std::vector<std::shared_ptr<Processor>>* processors) {
  FuseNormalizeCast(processors);
  FuseNormalizeHWC2CHW(processors);
+  FuseNormalizeColorConvert(processors);
 }


--- a/fastdeploy/vision/common/processors/transform.h
+++ b/fastdeploy/vision/common/processors/transform.h
@@ -31,6 +31,7 @@
 #include "fastdeploy/vision/common/processors/resize_by_short.h"
 #include "fastdeploy/vision/common/processors/stride_pad.h"
 #include "fastdeploy/vision/common/processors/warp_affine.h"
+#include <unordered_set>

 namespace fastdeploy {
 namespace vision {
@@ -41,6 +42,9 @@ void FuseTransforms(std::vector<std::shared_ptr<Processor>>* processors);
 void FuseNormalizeCast(std::vector<std::shared_ptr<Processor>>* processors);
 // Fuse Normalize + HWC2CHW to NormalizeAndPermute
 void FuseNormalizeHWC2CHW(std::vector<std::shared_ptr<Processor>>* processors);
+// Fuse Normalize + Color Convert
+void FuseNormalizeColorConvert(
+    std::vector<std::shared_ptr<Processor>>* processors);

 }  // namespace vision
 }  // namespace fastdeploy