diff --git a/examples/runtime/README.md b/examples/runtime/README.md index 849d6ecef..18651bd69 100644 --- a/examples/runtime/README.md +++ b/examples/runtime/README.md @@ -1,16 +1,16 @@ -# FastDeploy Runtime推理示例 +# FastDeploy Runtime examples -| 示例代码 | 编程语言 | 说明 | +| Example Code | Program Language | Description | | :------- | :------- | :---- | -| python/infer_paddle_paddle_inference.py | Python | paddle模型通过paddle inference在cpu/gpu上的推理 | -| python/infer_paddle_tensorrt.py | Python | paddle模型通过tensorrt在gpu上的推理 | -| python/infer_paddle_openvino.py | Python | paddle模型通过openvino在cpu上的推理 | -| python/infer_paddle_onnxruntime.py | Python | paddle模型通过onnx runtime在cpu/gpu上的推理 | -| python/infer_onnx_openvino.py | Python | onnx模型通过openvino在cpu上的推理 | -| python/infer_onnx_tensorrt.py | Python | onnx模型通过tensorrt在gpu上的推理 | -| cpp/infer_paddle_paddle_inference.cc | C++ | paddle模型通过paddle inference在cpu/gpu上的推理 | -| cpp/infer_paddle_tensorrt.cc | C++ | paddle模型通过tensorrt在gpu上的推理 | -| cpp/infer_paddle_openvino.cc | C++ | paddle模型通过openvino在cpu上的推理 | -| cpp/infer_paddle_onnxruntime.cc | C++ | paddle模型通过onnx runtime在cpu/gpu上的推理 | -| cpp/infer_onnx_openvino.cc | C++ | onnx模型通过openvino在cpu上的推理 | -| cpp/infer_onnx_tensorrt.cc | C++ | onnx模型通过tensorrt在gpu上的推理 | +| python/infer_paddle_paddle_inference.py | Python | Deploy Paddle model with Paddle Inference(CPU/GPU) | +| python/infer_paddle_tensorrt.py | Python | Deploy Paddle model with TensorRT(GPU) | +| python/infer_paddle_openvino.py | Python | Deploy Paddle model with OpenVINO(CPU) | +| python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU) | +| python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) | +| python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) | +| cpp/infer_paddle_paddle_inference.cc | C++ | Deploy Paddle model with Paddle Inference(CPU/GPU) | +| cpp/infer_paddle_tensorrt.cc | C++ | Deploy Paddle model with TensorRT(GPU) | +| cpp/infer_paddle_openvino.cc | C++ | Deploy Paddle model with OpenVINO(CPU | +| cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) | +| cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) | +| cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) | diff --git a/fastdeploy/vision/common/processors/normalize.cc b/fastdeploy/vision/common/processors/normalize.cc index 726ba67a7..e16379ba5 100644 --- a/fastdeploy/vision/common/processors/normalize.cc +++ b/fastdeploy/vision/common/processors/normalize.cc @@ -19,7 +19,7 @@ namespace vision { Normalize::Normalize(const std::vector& mean, const std::vector& std, bool is_scale, const std::vector& min, - const std::vector& max) { + const std::vector& max, bool swap_rb) { FDASSERT(mean.size() == std.size(), "Normalize: requires the size of mean equal to the size of std."); std::vector mean_(mean.begin(), mean.end()); @@ -50,6 +50,7 @@ Normalize::Normalize(const std::vector& mean, alpha_.push_back(alpha); beta_.push_back(beta); } + swap_rb_ = swap_rb; } bool Normalize::ImplByOpenCV(Mat* mat) { @@ -57,6 +58,7 @@ bool Normalize::ImplByOpenCV(Mat* mat) { std::vector split_im; cv::split(*im, split_im); + if (swap_rb_) std::swap(split_im[0], split_im[2]); for (int c = 0; c < im->channels(); c++) { split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); } @@ -79,9 +81,13 @@ bool Normalize::ImplByFlyCV(Mat* mat) { std[i] = 1.0 / alpha_[i]; mean[i] = -1 * beta_[i] * std[i]; } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]); + fcv::Mat new_im(im->width(), im->height(), fcv::FCVImageType::PKG_BGR_F32); - fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector(), + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, new_im, true); mat->SetMat(new_im); return true; @@ -91,8 +97,8 @@ bool Normalize::ImplByFlyCV(Mat* mat) { bool Normalize::Run(Mat* mat, const std::vector& mean, const std::vector& std, bool is_scale, const std::vector& min, - const std::vector& max, ProcLib lib) { - auto n = Normalize(mean, std, is_scale, min, max); + const std::vector& max, ProcLib lib, bool swap_rb) { + auto n = Normalize(mean, std, is_scale, min, max, swap_rb); return n(mat, lib); } diff --git a/fastdeploy/vision/common/processors/normalize.h b/fastdeploy/vision/common/processors/normalize.h index 515fcd7e6..c489207df 100644 --- a/fastdeploy/vision/common/processors/normalize.h +++ b/fastdeploy/vision/common/processors/normalize.h @@ -23,7 +23,8 @@ class FASTDEPLOY_DECL Normalize : public Processor { Normalize(const std::vector& mean, const std::vector& std, bool is_scale = true, const std::vector& min = std::vector(), - const std::vector& max = std::vector()); + const std::vector& max = std::vector(), + bool swap_rb = false); bool ImplByOpenCV(Mat* mat); #ifdef ENABLE_FLYCV bool ImplByFlyCV(Mat* mat); @@ -44,14 +45,23 @@ class FASTDEPLOY_DECL Normalize : public Processor { const std::vector& std, bool is_scale = true, const std::vector& min = std::vector(), const std::vector& max = std::vector(), - ProcLib lib = ProcLib::DEFAULT); + ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false); std::vector GetAlpha() const { return alpha_; } std::vector GetBeta() const { return beta_; } + bool GetSwapRB() { + return swap_rb_; + } + + void SetSwapRB(bool swap_rb) { + swap_rb_ = swap_rb; + } + private: std::vector alpha_; std::vector beta_; + bool swap_rb_; }; } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cc b/fastdeploy/vision/common/processors/normalize_and_permute.cc index cb78cc720..ca1565ec8 100644 --- a/fastdeploy/vision/common/processors/normalize_and_permute.cc +++ b/fastdeploy/vision/common/processors/normalize_and_permute.cc @@ -21,7 +21,8 @@ NormalizeAndPermute::NormalizeAndPermute(const std::vector& mean, const std::vector& std, bool is_scale, const std::vector& min, - const std::vector& max) { + const std::vector& max, + bool swap_rb) { FDASSERT(mean.size() == std.size(), "Normalize: requires the size of mean equal to the size of std."); std::vector mean_(mean.begin(), mean.end()); @@ -52,6 +53,7 @@ NormalizeAndPermute::NormalizeAndPermute(const std::vector& mean, alpha_.push_back(alpha); beta_.push_back(beta); } + swap_rb_ = swap_rb; } bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) { @@ -60,6 +62,7 @@ bool NormalizeAndPermute::ImplByOpenCV(Mat* mat) { int origin_h = im->rows; std::vector split_im; cv::split(*im, split_im); + if (swap_rb_) std::swap(split_im[0], split_im[2]); for (int c = 0; c < im->channels(); c++) { split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); } @@ -94,8 +97,12 @@ bool NormalizeAndPermute::ImplByFlyCV(Mat* mat) { std[i] = 1.0 / alpha_[i]; mean[i] = -1 * beta_[i] * std[i]; } + + std::vector channel_reorder_index = {0, 1, 2}; + if (swap_rb_) std::swap(channel_reorder_index[0], channel_reorder_index[2]); + fcv::Mat new_im; - fcv::normalize_to_submean_to_reorder(*im, mean, std, std::vector(), + fcv::normalize_to_submean_to_reorder(*im, mean, std, channel_reorder_index, new_im, false); mat->SetMat(new_im); mat->layout = Layout::CHW; @@ -106,8 +113,9 @@ bool NormalizeAndPermute::ImplByFlyCV(Mat* mat) { bool NormalizeAndPermute::Run(Mat* mat, const std::vector& mean, const std::vector& std, bool is_scale, const std::vector& min, - const std::vector& max, ProcLib lib) { - auto n = NormalizeAndPermute(mean, std, is_scale, min, max); + const std::vector& max, ProcLib lib, + bool swap_rb) { + auto n = NormalizeAndPermute(mean, std, is_scale, min, max, swap_rb); return n(mat, lib); } diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.h b/fastdeploy/vision/common/processors/normalize_and_permute.h index ec4766526..04715d9d7 100644 --- a/fastdeploy/vision/common/processors/normalize_and_permute.h +++ b/fastdeploy/vision/common/processors/normalize_and_permute.h @@ -23,7 +23,8 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor { NormalizeAndPermute(const std::vector& mean, const std::vector& std, bool is_scale = true, const std::vector& min = std::vector(), - const std::vector& max = std::vector()); + const std::vector& max = std::vector(), + bool swap_rb = false); bool ImplByOpenCV(Mat* mat); #ifdef ENABLE_FLYCV bool ImplByFlyCV(Mat* mat); @@ -44,7 +45,7 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor { const std::vector& std, bool is_scale = true, const std::vector& min = std::vector(), const std::vector& max = std::vector(), - ProcLib lib = ProcLib::DEFAULT); + ProcLib lib = ProcLib::DEFAULT, bool swap_rb = false); void SetAlpha(const std::vector& alpha) { alpha_.clear(); @@ -58,9 +59,18 @@ class FASTDEPLOY_DECL NormalizeAndPermute : public Processor { beta_.assign(beta.begin(), beta.end()); } + bool GetSwapRB() { + return swap_rb_; + } + + void SetSwapRB(bool swap_rb) { + swap_rb_ = swap_rb; + } + private: std::vector alpha_; std::vector beta_; + bool swap_rb_; }; } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/transform.cc b/fastdeploy/vision/common/processors/transform.cc index 8d440b9c6..d54a4bca4 100644 --- a/fastdeploy/vision/common/processors/transform.cc +++ b/fastdeploy/vision/common/processors/transform.cc @@ -95,10 +95,77 @@ void FuseNormalizeHWC2CHW( << std::endl; } +void FuseNormalizeColorConvert( + std::vector>* processors) { + // Fuse Normalize and BGR2RGB/RGB2BGR + int normalize_index = -1; + int color_convert_index = -1; + // If these middle processors are after BGR2RGB/RGB2BGR and before Normalize, + // we can still fuse Normalize and BGR2RGB/RGB2BGR + static std::unordered_set middle_processors( + {"Resize", "ResizeByShort", "ResizeByLong", "Crop", "CenterCrop", + "LimitByStride", "LimitShort", "Pad", "PadToSize", "StridePad", + "WarpAffine"}); + + for (size_t i = 0; i < processors->size(); ++i) { + if ((*processors)[i]->Name() == "BGR2RGB" || + (*processors)[i]->Name() == "RGB2BGR") { + color_convert_index = i; + for (size_t j = color_convert_index + 1; j < processors->size(); ++j) { + if ((*processors)[j]->Name() == "Normalize" || + (*processors)[j]->Name() == "NormalizeAndPermute") { + normalize_index = j; + break; + } + } + if (normalize_index < 0) { + return; + } + for (size_t j = color_convert_index + 1; j < normalize_index; ++j) { + if (middle_processors.count((*processors)[j]->Name())) { + continue; + } + return; + } + } + } + + if (color_convert_index < 0) { + return; + } + + // Delete Color Space Convert + std::string color_processor_name = (*processors)[color_convert_index]->Name(); + processors->erase(processors->begin() + color_convert_index); + + // Toggle the swap_rb option of the Normalize processor + std::string normalize_processor_name = + (*processors)[normalize_index - 1]->Name(); + bool swap_rb; + if (normalize_processor_name == "Normalize") { + auto processor = dynamic_cast( + (*processors)[normalize_index - 1].get()); + swap_rb = processor->GetSwapRB(); + processor->SetSwapRB(!swap_rb); + } else if (normalize_processor_name == "NormalizeAndPermute") { + auto processor = dynamic_cast( + (*processors)[normalize_index - 1].get()); + swap_rb = processor->GetSwapRB(); + processor->SetSwapRB(!swap_rb); + } else { + FDASSERT(false, "Something wrong in FuseNormalizeColorConvert()."); + } + + FDINFO << color_processor_name << " and " << normalize_processor_name + << " are fused to " << normalize_processor_name + << " with swap_rb=" << !swap_rb << std::endl; +} + void FuseTransforms( std::vector>* processors) { FuseNormalizeCast(processors); FuseNormalizeHWC2CHW(processors); + FuseNormalizeColorConvert(processors); } diff --git a/fastdeploy/vision/common/processors/transform.h b/fastdeploy/vision/common/processors/transform.h index 53f7ffd63..2a914fff7 100644 --- a/fastdeploy/vision/common/processors/transform.h +++ b/fastdeploy/vision/common/processors/transform.h @@ -31,6 +31,7 @@ #include "fastdeploy/vision/common/processors/resize_by_short.h" #include "fastdeploy/vision/common/processors/stride_pad.h" #include "fastdeploy/vision/common/processors/warp_affine.h" +#include namespace fastdeploy { namespace vision { @@ -41,6 +42,9 @@ void FuseTransforms(std::vector>* processors); void FuseNormalizeCast(std::vector>* processors); // Fuse Normalize + HWC2CHW to NormalizeAndPermute void FuseNormalizeHWC2CHW(std::vector>* processors); +// Fuse Normalize + Color Convert +void FuseNormalizeColorConvert( + std::vector>* processors); } // namespace vision } // namespace fastdeploy