[Benchmark]Compare diff for OCR (#1415)

* avoid mem copy for cpp benchmark

* set CMAKE_BUILD_TYPE to Release

* Add SegmentationDiff

* change pointer to reference

* fixed bug

* cast uint8 to int32

* Add diff compare for OCR

* Add diff compare for OCR

* rm ppocr pipeline

* Add yolov5 diff compare

* Add yolov5 diff compare

* deal with comments

* deal with comments

* fixed bug

* fixed bug
This commit is contained in:
WJJ1995
2023-02-23 18:57:39 +08:00
committed by GitHub
parent 0c664fd006
commit d3845eb4e1
38 changed files with 513 additions and 255 deletions

View File

@@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc) add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc) add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc) add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc) add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
@@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
else() else()
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
endif() endif()

View File

@@ -1,97 +0,0 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "option.h"
// Only for ppocr
DEFINE_string(det_model, "", "Path of Detection model of PPOCR.");
DEFINE_string(cls_model, "", "Path of Classification model of PPOCR.");
DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR.");
DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR.");
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return -1;
}
auto im = cv::imread(FLAGS_image);
auto im_rec = cv::imread(FLAGS_image_rec);
// Detection Model
auto det_model_file =
FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel";
auto det_params_file =
FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams";
// Classification Model
auto cls_model_file =
FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel";
auto cls_params_file =
FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams";
// Recognition Model
auto rec_model_file =
FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel";
auto rec_params_file =
FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams";
auto rec_label_file = FLAGS_rec_label_file;
if (FLAGS_backend == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
auto det_option = option;
auto cls_option = option;
auto rec_option = option;
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
{1, 3, 960, 960});
cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
{8, 3, 48, 1024});
rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
{8, 3, 48, 2304});
}
auto det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
auto cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
auto rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label_file, rec_option);
// Only for runtime
if (FLAGS_profile_mode == "runtime") {
std::vector<std::array<int, 8>> boxes_result;
std::cout << "====Detection model====" << std::endl;
BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result));
int32_t cls_label;
float cls_score;
std::cout << "====Classification model====" << std::endl;
BENCHMARK_MODEL(cls_model,
cls_model.Predict(im_rec, &cls_label, &cls_score));
std::string text;
float rec_score;
std::cout << "====Recognization model====" << std::endl;
BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score));
}
auto model_ppocrv3 =
fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
fastdeploy::vision::OCRResult res;
if (FLAGS_profile_mode == "end2end") {
BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res))
}
auto vis_im = fastdeploy::vision::VisOcr(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
#endif
return 0;
}

View File

@@ -0,0 +1,57 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "option.h"
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return -1;
}
auto im = cv::imread(FLAGS_image);
// Classification Model
auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
if (FLAGS_backend == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
{8, 3, 48, 1024});
}
auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, option);
int32_t res_label;
float res_score;
// Run once at least
model_ppocr_cls.Predict(im, &res_label, &res_score);
// 1. Test result diff
std::cout << "=============== Test result diff =================\n";
int32_t res_label_expect = 0;
float res_score_expect = 1.0;
// Calculate diff between two results.
auto ppocr_cls_label_diff = res_label - res_label_expect;
auto ppocr_cls_score_diff = res_score - res_score_expect;
std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
<< std::endl;
BENCHMARK_MODEL(model_ppocr_cls,
model_ppocr_cls.Predict(im, &res_label, &res_score));
#endif
return 0;
}

View File

@@ -0,0 +1,63 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "option.h"
namespace vision = fastdeploy::vision;
namespace benchmark = fastdeploy::benchmark;
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return -1;
}
auto im = cv::imread(FLAGS_image);
// Detection Model
auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
if (FLAGS_backend == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
{1, 3, 960, 960});
}
auto model_ppocr_det =
vision::ocr::DBDetector(det_model_file, det_params_file, option);
std::vector<std::array<int, 8>> res;
// Run once at least
model_ppocr_det.Predict(im, &res);
// 1. Test result diff
std::cout << "=============== Test result diff =================\n";
// Save result to -> disk.
std::string ppocr_det_result_path = "ppocr_det_result.txt";
benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path);
// Load result from <- disk.
std::vector<std::array<int, 8>> res_loaded;
benchmark::ResultManager::LoadOCRDetResult(&res_loaded,
ppocr_det_result_path);
// Calculate diff between two results.
auto ppocr_det_diff =
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
<< ", max=" << ppocr_det_diff.boxes.max
<< ", min=" << ppocr_det_diff.boxes.min << std::endl;
BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
#endif
return 0;
}

View File

@@ -0,0 +1,59 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "option.h"
DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return -1;
}
auto im = cv::imread(FLAGS_image);
// Recognition Model
auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
if (FLAGS_backend == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
{8, 3, 48, 2304});
}
auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
std::string text;
float rec_score;
// Run once at least
model_ppocr_rec.Predict(im, &text, &rec_score);
// 1. Test result diff
std::cout << "=============== Test result diff =================\n";
std::string text_expect = "上海斯格威铂尔大酒店";
float res_score_expect = 0.993308;
// Calculate diff between two results.
auto ppocr_rec_text_diff = text.compare(text_expect);
auto ppocr_rec_score_diff = rec_score - res_score_expect;
std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
<< std::endl;
BENCHMARK_MODEL(model_ppocr_rec,
model_ppocr_rec.Predict(im, &text, &rec_score));
#endif
return 0;
}

29
benchmark/cpp/benchmark_yolov5.cc Executable file → Normal file
View File

@@ -16,6 +16,9 @@
#include "macros.h" #include "macros.h"
#include "option.h" #include "option.h"
namespace vision = fastdeploy::vision;
namespace benchmark = fastdeploy::benchmark;
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization // Initialization
@@ -24,11 +27,29 @@ int main(int argc, char* argv[]) {
return -1; return -1;
} }
auto im = cv::imread(FLAGS_image); auto im = cv::imread(FLAGS_image);
auto model_yolov5 = auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option); vision::DetectionResult res;
fastdeploy::vision::DetectionResult res; // Run once at least
model_yolov5.Predict(im, &res);
// 1. Test result diff
std::cout << "=============== Test result diff =================\n";
// Save result to -> disk.
std::string det_result_path = "yolov5_result.txt";
benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
// Load result from <- disk.
vision::DetectionResult res_loaded;
benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
// Calculate diff between two results.
auto det_diff =
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
<< ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
<< std::endl;
std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
<< ", max=" << det_diff.labels.max
<< ", min=" << det_diff.labels.min << std::endl;
BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res)) BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
auto vis_im = fastdeploy::vision::VisDetection(im, res); auto vis_im = vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im); cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
#endif #endif

73
fastdeploy/benchmark/utils.cc Normal file → Executable file
View File

@@ -474,6 +474,34 @@ bool ResultManager::SaveSegmentationResult(
return true; return true;
} }
bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
const std::string& path) {
if (res.empty()) {
FDERROR << "OCRDetResult can not be empty!" << std::endl;
return false;
}
std::ofstream fs(path, std::ios::out);
if (!fs.is_open()) {
FDERROR << "Fail to open file:" << path << std::endl;
return false;
}
fs.precision(20);
// boxes
fs << "boxes" << KEY_VALUE_SEP;
for (int i = 0; i < res.size(); ++i) {
for (int j = 0; j < 8; ++j) {
if ((i == res.size() - 1) && (j == 7)) {
fs << res[i][j];
} else {
fs << res[i][j] << VALUE_SEP;
}
}
}
fs << "\n";
fs.close();
return true;
}
bool ResultManager::LoadDetectionResult(vision::DetectionResult* res, bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
const std::string& path) { const std::string& path) {
if (!CheckFileExists(path)) { if (!CheckFileExists(path)) {
@@ -556,6 +584,26 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
return true; return true;
} }
bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
const std::string& path) {
if (!CheckFileExists(path)) {
FDERROR << "Can't found file from" << path << std::endl;
return false;
}
auto lines = ReadLines(path);
std::map<std::string, std::vector<std::string>> data;
// boxes
data = SplitDataLine(lines[0]);
int boxes_num = data.begin()->second.size() / 8;
res->resize(boxes_num);
for (int i = 0; i < boxes_num; ++i) {
for (int j = 0; j < 8; ++j) {
(*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
}
}
return true;
}
DetectionDiff ResultManager::CalculateDiffStatis( DetectionDiff ResultManager::CalculateDiffStatis(
const vision::DetectionResult& lhs, const vision::DetectionResult& rhs, const vision::DetectionResult& lhs, const vision::DetectionResult& rhs,
const float& score_threshold) { const float& score_threshold) {
@@ -643,6 +691,31 @@ SegmentationDiff ResultManager::CalculateDiffStatis(
return diff; return diff;
} }
OCRDetDiff ResultManager::CalculateDiffStatis(
const std::vector<std::array<int, 8>>& lhs,
const std::vector<std::array<int, 8>>& rhs) {
const int boxes_nums = std::min(lhs.size(), rhs.size());
std::vector<std::array<int, 8>> lhs_sort = lhs;
std::vector<std::array<int, 8>> rhs_sort = rhs;
// lex sort by x(w) & y(h)
vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
// get value diff
const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
std::vector<float> boxes_diff;
for (int i = 0; i < boxes_num; ++i) {
for (int j = 0; j < 8; ++j) {
boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
}
}
OCRDetDiff diff;
CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
&(diff.boxes.mean), &(diff.boxes.max),
&(diff.boxes.min));
return diff;
}
#endif // ENABLE_VISION #endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK #endif // ENABLE_BENCHMARK

View File

@@ -122,6 +122,10 @@ struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff {
EvalStatis labels; EvalStatis labels;
}; };
struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff {
EvalStatis boxes;
};
#endif // ENABLE_VISION #endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK #endif // ENABLE_BENCHMARK
@@ -148,6 +152,10 @@ struct FASTDEPLOY_DECL ResultManager {
const std::string& path); const std::string& path);
static bool LoadSegmentationResult(vision::SegmentationResult* res, static bool LoadSegmentationResult(vision::SegmentationResult* res,
const std::string& path); const std::string& path);
static bool SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
const std::string& path);
static bool LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
const std::string& path);
/// Calculate diff value between two basic results. /// Calculate diff value between two basic results.
static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs, static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs,
const vision::DetectionResult& rhs, const vision::DetectionResult& rhs,
@@ -157,6 +165,9 @@ struct FASTDEPLOY_DECL ResultManager {
static SegmentationDiff CalculateDiffStatis( static SegmentationDiff CalculateDiffStatis(
const vision::SegmentationResult& lhs, const vision::SegmentationResult& lhs,
const vision::SegmentationResult& rhs); const vision::SegmentationResult& rhs);
static OCRDetDiff CalculateDiffStatis(
const std::vector<std::array<int, 8>>& lhs,
const std::vector<std::array<int, 8>>& rhs);
#endif // ENABLE_VISION #endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK #endif // ENABLE_BENCHMARK
}; };

View File

@@ -13,23 +13,22 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/classification/contrib/resnet.h" #include "fastdeploy/vision/classification/contrib/resnet.h"
#include "fastdeploy/vision/utils/utils.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {
namespace vision { namespace vision {
namespace classification { namespace classification {
ResNet::ResNet(const std::string& model_file, ResNet::ResNet(const std::string& model_file, const std::string& params_file,
const std::string& params_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
// In constructor, the 3 steps below are necessary. // In constructor, the 3 steps below are necessary.
// 1. set the Backend 2. set RuntimeOption 3. call Initialize() // 1. set the Backend 2. set RuntimeOption 3. call Initialize()
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER}; valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER};
@@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file,
} }
bool ResNet::Initialize() { bool ResNet::Initialize() {
// In this function, the 3 steps below are necessary. // In this function, the 3 steps below are necessary.
// 1. assign values to the global variables 2. call InitRuntime() // 1. assign values to the global variables 2. call InitRuntime()
@@ -57,14 +55,15 @@ bool ResNet::Initialize() {
return true; return true;
} }
bool ResNet::Preprocess(Mat* mat, FDTensor* output) { bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
// In this function, the preprocess need be implemented according to the
// original Repos,
// The result of preprocess has to be saved in FDTensor variable, because the
// input of Infer() need to be std::vector<FDTensor>.
// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
// FDTensor variable.
// In this function, the preprocess need be implemented according to the original Repos, if (mat->Height() != size[0] || mat->Width() != size[1]) {
// The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector<FDTensor>.
// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable.
if (mat->Height()!=size[0] || mat->Width()!=size[1]){
int interp = cv::INTER_LINEAR; int interp = cv::INTER_LINEAR;
Resize::Run(mat, size[1], size[0], -1, -1, interp); Resize::Run(mat, size[1], size[0], -1, -1, interp);
} }
@@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
bool ResNet::Postprocess(FDTensor& infer_result, bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result,
ClassifyResult* result, int topk) { int topk) {
// In this function, the postprocess need be implemented according to the
// In this function, the postprocess need be implemented according to the original Repos, // original Repos,
// Finally the reslut of postprocess should be saved in ClassifyResult variable. // Finally the reslut of postprocess should be saved in ClassifyResult
// 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable. // variable.
// 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
// variable.
int num_classes = infer_result.shape[1]; int num_classes = infer_result.shape[1];
function::Softmax(infer_result, &infer_result); function::Softmax(infer_result, &infer_result);
const float* infer_result_buffer = reinterpret_cast<float*>(infer_result.Data()); const float* infer_result_buffer =
reinterpret_cast<float*>(infer_result.Data());
topk = std::min(num_classes, topk); topk = std::min(num_classes, topk);
result->label_ids = result->label_ids =
utils::TopKIndices(infer_result_buffer, num_classes, topk); utils::TopKIndices(infer_result_buffer, num_classes, topk);
@@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result,
} }
bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) { bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
// In this function, the Preprocess(), Infer(), and Postprocess() are called
// In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially. // sequentially.
Mat mat(*im); Mat mat(*im);
std::vector<FDTensor> processed_data(1); std::vector<FDTensor> processed_data(1);
@@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
return true; return true;
} }
} // namespace classification } // namespace classification
} // namespace vision } // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -20,18 +20,19 @@ namespace vision {
namespace classification { namespace classification {
YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() { YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
size_ = {224, 224}; //{h,w} size_ = {224, 224}; //{h,w}
} }
bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output, bool YOLOv5ClsPreprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info) { FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
// process after image load // process after image load
double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()), double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())); static_cast<float>(mat->Width()));
// yolov5cls's preprocess steps // yolov5cls's preprocess steps
// 1. CenterCrop // 1. CenterCrop
@@ -54,20 +55,22 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool YOLOv5ClsPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool YOLOv5ClsPreprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(images->size()); std::vector<FDTensor> tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) { for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl; FDERROR << "Failed to preprocess input image." << std::endl;

View File

@@ -20,26 +20,27 @@ namespace vision {
namespace detection { namespace detection {
FastestDetPreprocessor::FastestDetPreprocessor() { FastestDetPreprocessor::FastestDetPreprocessor() {
size_ = {352, 352}; //{h,w} size_ = {352, 352}; //{h,w}
} }
bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output, bool FastestDetPreprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info) { FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
// process after image load // process after image load
double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()), double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())); static_cast<float>(mat->Width()));
// fastestdet's preprocess steps // fastestdet's preprocess steps
// 1. resize // 1. resize
// 2. convert_and_permute(swap_rb=false) // 2. convert_and_permute(swap_rb=false)
Resize::Run(mat, size_[0], size_[1]); //resize Resize::Run(mat, size_[0], size_[1]); // resize
std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
std::vector<float> beta = {0.0f, 0.0f, 0.0f}; std::vector<float> beta = {0.0f, 0.0f, 0.0f};
//convert to float and HWC2CHW // convert to float and HWC2CHW
ConvertAndPermute::Run(mat, alpha, beta, false); ConvertAndPermute::Run(mat, alpha, beta, false);
// Record output shape of preprocessed image // Record output shape of preprocessed image
@@ -47,20 +48,22 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool FastestDetPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool FastestDetPreprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(images->size()); std::vector<FDTensor> tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) { for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl; FDERROR << "Failed to preprocess input image." << std::endl;

View File

@@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

6
fastdeploy/vision/detection/contrib/scaledyolov4.cc Executable file → Normal file
View File

@@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER}; valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER};
@@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

6
fastdeploy/vision/detection/contrib/yolor.cc Executable file → Normal file
View File

@@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER}; valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER};
@@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -64,8 +64,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
} }
} }
bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, bool YOLOv5Preprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info) { FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool YOLOv5Preprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(images->size()); std::vector<FDTensor> tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) { for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl; FDERROR << "Failed to preprocess input image." << std::endl;

View File

@@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
@@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_); input_tensor_cuda_buffer_device_);
output->device = Device::GPU; output->device = Device::GPU;
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
#else #else
FDERROR << "CUDA src code was not enabled." << std::endl; FDERROR << "CUDA src code was not enabled." << std::endl;

View File

@@ -64,8 +64,9 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) {
} }
} }
bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output, bool YOLOv5SegPreprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info) { FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool YOLOv5SegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool YOLOv5SegPreprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(images->size()); std::vector<FDTensor> tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) { for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl; FDERROR << "Failed to preprocess input image." << std::endl;

View File

@@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
@@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_); input_tensor_cuda_buffer_device_);
output->device = Device::GPU; output->device = Device::GPU;
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
#else #else
FDERROR << "CUDA src code was not enabled." << std::endl; FDERROR << "CUDA src code was not enabled." << std::endl;

View File

@@ -64,8 +64,9 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
} }
} }
bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output, bool YOLOv7Preprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info) { FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool YOLOv7Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool YOLOv7Preprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(images->size()); std::vector<FDTensor> tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) { for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl; FDERROR << "Failed to preprocess input image." << std::endl;

View File

@@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
@@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
return false; return false;
} }
if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) { if (!Postprocess(reused_output_tensors_[0], result, im_info,
conf_threshold)) {
FDERROR << "Failed to post process." << std::endl; FDERROR << "Failed to post process." << std::endl;
return false; return false;
} }

View File

@@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
@@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_); input_tensor_cuda_buffer_device_);
output->device = Device::GPU; output->device = Device::GPU;
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
#else #else
FDERROR << "CUDA src code was not enabled." << std::endl; FDERROR << "CUDA src code was not enabled." << std::endl;

View File

@@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess(
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -22,13 +22,12 @@ namespace vision {
namespace facealign { namespace facealign {
PFLD::PFLD(const std::string& model_file, PFLD::PFLD(const std::string& model_file, const std::string& params_file,
const std::string& params_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -71,12 +70,13 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result, bool PFLD::Postprocess(
const std::map<std::string, std::array<int, 2>>& im_info) { FDTensor& infer_result, FaceAlignmentResult* result,
const std::map<std::string, std::array<int, 2>>& im_info) {
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
if (infer_result.dtype != FDDataType::FP32) { if (infer_result.dtype != FDDataType::FP32) {
FDERROR << "Only support post process with float32 data." << std::endl; FDERROR << "Only support post process with float32 data." << std::endl;
@@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
} }
auto iter_in = im_info.find("input_shape"); auto iter_in = im_info.find("input_shape");
FDASSERT(iter_in != im_info.end(), FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
"Cannot find input_shape from im_info.");
int in_h = iter_in->second[0]; int in_h = iter_in->second[0];
int in_w = iter_in->second[1]; int in_w = iter_in->second[1];
@@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
x = std::min(std::max(0.f, x), 1.0f); x = std::min(std::max(0.f, x), 1.0f);
y = std::min(std::max(0.f, y), 1.0f); y = std::min(std::max(0.f, y), 1.0f);
// decode landmarks (default 106 landmarks) // decode landmarks (default 106 landmarks)
result->landmarks.emplace_back( result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
std::array<float, 2>{x * in_w, y * in_h});
} }
return true; return true;

View File

@@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -145,7 +145,7 @@ bool RetinaFace::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -27,7 +27,7 @@ UltraFace::UltraFace(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
@@ -90,7 +90,7 @@ bool UltraFace::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT}; valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -115,11 +115,11 @@ bool YOLOv5Face::Preprocess(
// process after image load // process after image load
float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()), float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
size[0] * 1.0f / static_cast<float>(mat->Width())); size[0] * 1.0f / static_cast<float>(mat->Width()));
#ifndef __ANDROID__ #ifndef __ANDROID__
// Because of the low CPU performance on the Android device, // Because of the low CPU performance on the Android device,
// we decided to hide this extra resize. It won't make much // we decided to hide this extra resize. It won't make much
// difference to the final result. // difference to the final result.
if (std::fabs(ratio - 1.0f) > 1e-06) { if (std::fabs(ratio - 1.0f) > 1e-06) {
int interp = cv::INTER_LINEAR; int interp = cv::INTER_LINEAR;
if (ratio > 1.0) { if (ratio > 1.0) {
interp = cv::INTER_LINEAR; interp = cv::INTER_LINEAR;
@@ -128,7 +128,7 @@ bool YOLOv5Face::Preprocess(
int resize_w = int(round(static_cast<float>(mat->Width()) * ratio)); int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
Resize::Run(mat, resize_w, resize_h, -1, -1, interp); Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
} }
#endif #endif
// yolov5face's preprocess steps // yolov5face's preprocess steps
// 1. letterbox // 1. letterbox
// 2. BGR->RGB // 2. BGR->RGB
@@ -149,9 +149,9 @@ bool YOLOv5Face::Preprocess(
HWC2CHW::Run(mat); HWC2CHW::Run(mat);
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
max_wh_ = 7680.0; max_wh_ = 7680.0;
} }
bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool Yolov7FacePreprocessor::Run(
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) { std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
if (images->size() == 0) { if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
ims_info->resize(images->size()); ims_info->resize(images->size());
@@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
return true; return true;
} }
bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output, bool Yolov7FacePreprocessor::Preprocess(
std::map<std::string, std::array<float, 2>>* im_info){ FDMat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Record the shape of image and the shape of preprocessed image // Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()), (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
@@ -75,13 +78,13 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast<float>(mat->Width())}; static_cast<float>(mat->Width())};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
void Yolov7FacePreprocessor::LetterBox(FDMat* mat) { void Yolov7FacePreprocessor::LetterBox(FDMat* mat) {
float scale = float scale =
std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
if (!is_scale_up_) { if (!is_scale_up_) {
scale = std::min(scale, 1.0f); scale = std::min(scale, 1.0f);
} }

View File

@@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() {
permute_ = true; permute_ = true;
} }
bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) { bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) {
// face recognition model's preprocess steps in insightface // face recognition model's preprocess steps in insightface
// reference: insightface/recognition/arcface_torch/inference.py // reference: insightface/recognition/arcface_torch/inference.py
// 1. Resize // 1. Resize
@@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
bool AdaFacePreprocessor::Run(std::vector<FDMat>* images, bool AdaFacePreprocessor::Run(std::vector<FDMat>* images,
std::vector<FDTensor>* outputs) { std::vector<FDTensor>* outputs) {
if (images->empty()) { if (images->empty()) {
FDERROR << "The size of input images should be greater than 0." << std::endl; FDERROR << "The size of input images should be greater than 0."
<< std::endl;
return false; return false;
} }
FDASSERT(images->size() == 1, "Only support batch = 1 now."); FDASSERT(images->size() == 1, "Only support batch = 1 now.");

View File

@@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat,
} }
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }

View File

@@ -22,13 +22,12 @@ namespace vision {
namespace headpose { namespace headpose {
FSANet::FSANet(const std::string& model_file, FSANet::FSANet(const std::string& model_file, const std::string& params_file,
const std::string& params_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -52,7 +51,7 @@ bool FSANet::Initialize() {
} }
bool FSANet::Preprocess(Mat* mat, FDTensor* output, bool FSANet::Preprocess(Mat* mat, FDTensor* output,
std::map<std::string, std::array<int, 2>>* im_info) { std::map<std::string, std::array<int, 2>>* im_info) {
// Resize // Resize
int resize_w = size[0]; int resize_w = size[0];
int resize_h = size[1]; int resize_h = size[1];
@@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
// Normalize // Normalize
std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f}; std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f}; std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f,
-127.5f / 128.0f};
Convert::Run(mat, alpha, beta); Convert::Run(mat, alpha, beta);
// Record output shape of preprocessed image // Record output shape of preprocessed image
@@ -72,12 +72,13 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result, bool FSANet::Postprocess(
const std::map<std::string, std::array<int, 2>>& im_info) { FDTensor& infer_result, HeadPoseResult* result,
const std::map<std::string, std::array<int, 2>>& im_info) {
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
if (infer_result.dtype != FDDataType::FP32) { if (infer_result.dtype != FDDataType::FP32) {
FDERROR << "Only support post process with float32 data." << std::endl; FDERROR << "Only support post process with float32 data." << std::endl;
@@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
} }
auto iter_in = im_info.find("input_shape"); auto iter_in = im_info.find("input_shape");
FDASSERT(iter_in != im_info.end(), FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
"Cannot find input_shape from im_info.");
int in_h = iter_in->second[0]; int in_h = iter_in->second[0];
int in_w = iter_in->second[1]; int in_w = iter_in->second[1];

View File

@@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output,
Cast::Run(mat, "float"); Cast::Run(mat, "float");
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true; return true;
} }
@@ -106,8 +106,8 @@ bool MODNet::Postprocess(
float* alpha_ptr = static_cast<float*>(alpha_tensor.Data()); float* alpha_ptr = static_cast<float*>(alpha_tensor.Data());
// cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
// Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. // Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy.
Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
alpha_ptr); // ref-only, zero copy. alpha_ptr); // ref-only, zero copy.
if ((out_h != ipt_h) || (out_w != ipt_w)) { if ((out_h != ipt_h) || (out_w != ipt_w)) {
Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1); Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
} }

10
fastdeploy/vision/matting/contrib/rvm.cc Executable file → Normal file
View File

@@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess(
(*im_info)["output_shape"] = {mat->Height(), mat->Width()}; (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
mat->ShareWithTensor(output); mat->ShareWithTensor(output);
output->ExpandDim(0); // reshape to n, h, w, c output->ExpandDim(0); // reshape to n, c, h, w
return true; return true;
} }
@@ -118,16 +118,16 @@ bool RobustVideoMatting::Postprocess(
// for alpha // for alpha
float* alpha_ptr = static_cast<float*>(alpha.Data()); float* alpha_ptr = static_cast<float*>(alpha.Data());
Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
alpha_ptr); // ref-only, zero copy. alpha_ptr); // ref-only, zero copy.
if ((out_h != in_h) || (out_w != in_w)) { if ((out_h != in_h) || (out_w != in_w)) {
Resize::Run(&alpha_resized, in_w, in_h, -1, -1); Resize::Run(&alpha_resized, in_w, in_h, -1, -1);
} }
// for foreground // for foreground
float* fgr_ptr = static_cast<float*>(fgr.Data()); float* fgr_ptr = static_cast<float*>(fgr.Data());
Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
fgr_ptr); // ref-only, zero copy. fgr_ptr); // ref-only, zero copy.
if ((out_h != in_h) || (out_w != in_w)) { if ((out_h != in_h) || (out_w != in_w)) {
Resize::Run(&fgr_resized, in_w, in_h, -1, -1); Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
} }

62
fastdeploy/vision/utils/sort_det_res.cc Normal file → Executable file
View File

@@ -77,27 +77,42 @@ void SortDetectionResult(DetectionResult* result) {
MergeSort(result, low, high); MergeSort(result, low, high);
} }
bool LexSortByXYCompare(const std::array<float, 4>& box_a, template <typename T>
const std::array<float, 4>& box_b) { bool LexSortByXYCompare(const std::array<T, 4>& box_a,
const std::array<T, 4>& box_b) {
// WARN: The status shoule be false if (a==b). // WARN: The status shoule be false if (a==b).
// https://blog.csdn.net/xxxwrq/article/details/83080640 // https://blog.csdn.net/xxxwrq/article/details/83080640
auto is_equal = [](const float& a, const float& b) -> bool { auto is_equal = [](const T& a, const T& b) -> bool {
return std::abs(a - b) < 1e-6f; return std::abs(a - b) < 1e-6f;
}; };
const float& x0_a = box_a[0]; const T& x0_a = box_a[0];
const float& y0_a = box_a[1]; const T& y0_a = box_a[1];
const float& x0_b = box_b[0]; const T& x0_b = box_b[0];
const float& y0_b = box_b[1]; const T& y0_b = box_b[1];
if (is_equal(x0_a, x0_b)) { if (is_equal(x0_a, x0_b)) {
return is_equal(y0_a, y0_b) ? false : y0_a > y0_b; return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
} }
return x0_a > x0_b; return x0_a > x0_b;
} }
// Only for int dtype
template <>
bool LexSortByXYCompare(const std::array<int, 4>& box_a,
const std::array<int, 4>& box_b) {
const int& x0_a = box_a[0];
const int& y0_a = box_a[1];
const int& x0_b = box_b[0];
const int& y0_b = box_b[1];
if (x0_a == x0_b) {
return y0_a == y0_b ? false : y0_a > y0_b;
}
return x0_a > x0_b;
}
void ReorderDetectionResultByIndices(DetectionResult* result, void ReorderDetectionResultByIndices(DetectionResult* result,
const std::vector<size_t>& indices) { const std::vector<size_t>& indices) {
// reorder boxes, scores, label_ids, masks // reorder boxes, scores, label_ids, masks
DetectionResult backup = (*result); // move DetectionResult backup = (*result);
const bool contain_masks = backup.contain_masks; const bool contain_masks = backup.contain_masks;
const int boxes_num = backup.boxes.size(); const int boxes_num = backup.boxes.size();
result->Clear(); result->Clear();
@@ -122,7 +137,7 @@ void ReorderDetectionResultByIndices(DetectionResult* result,
} }
void LexSortDetectionResultByXY(DetectionResult* result) { void LexSortDetectionResultByXY(DetectionResult* result) {
if (result->boxes.size() == 0) { if (result->boxes.empty()) {
return; return;
} }
std::vector<size_t> indices; std::vector<size_t> indices;
@@ -138,6 +153,35 @@ void LexSortDetectionResultByXY(DetectionResult* result) {
ReorderDetectionResultByIndices(result, indices); ReorderDetectionResultByIndices(result, indices);
} }
void LexSortOCRDetResultByXY(std::vector<std::array<int, 8>>* result) {
if (result->empty()) {
return;
}
std::vector<size_t> indices;
indices.resize(result->size());
std::vector<std::array<int, 4>> boxes;
boxes.resize(result->size());
for (size_t i = 0; i < result->size(); ++i) {
indices[i] = i;
// 4 points to 2 points for LexSort
boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6],
(*result)[i][7]};
}
// lex sort by x(w) then y(h)
std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
return LexSortByXYCompare(boxes[a], boxes[b]);
});
// reorder boxes
std::vector<std::array<int, 8>> backup = (*result);
const int boxes_num = backup.size();
result->clear();
result->resize(boxes_num);
// boxes
for (int i = 0; i < boxes_num; ++i) {
(*result)[i] = backup[indices[i]];
}
}
} // namespace utils } // namespace utils
} // namespace vision } // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

5
fastdeploy/vision/utils/utils.h Normal file → Executable file
View File

@@ -67,8 +67,11 @@ void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
/// Sort DetectionResult/FaceDetectionResult by score /// Sort DetectionResult/FaceDetectionResult by score
FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result); FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result);
FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result); FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result);
/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis /// Lex Sort DetectionResult by x(w) & y(h) axis
FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result); FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result);
/// Lex Sort OCRDet Result by x(w) & y(h) axis
FASTDEPLOY_DECL void LexSortOCRDetResultByXY(
std::vector<std::array<int, 8>>* result);
/// L2 Norm / cosine similarity (for face recognition, ...) /// L2 Norm / cosine similarity (for face recognition, ...)
FASTDEPLOY_DECL std::vector<float> FASTDEPLOY_DECL std::vector<float>