mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Benchmark]Compare diff for OCR (#1415)
* avoid mem copy for cpp benchmark * set CMAKE_BUILD_TYPE to Release * Add SegmentationDiff * change pointer to reference * fixed bug * cast uint8 to int32 * Add diff compare for OCR * Add diff compare for OCR * rm ppocr pipeline * Add yolov5 diff compare * Add yolov5 diff compare * deal with comments * deal with comments * fixed bug * fixed bug
This commit is contained in:
@@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
|
||||
add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
|
||||
add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
|
||||
add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
|
||||
add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc)
|
||||
add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
|
||||
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
|
||||
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
|
||||
|
||||
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
@@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
else()
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
|
||||
endif()
|
||||
|
@@ -1,97 +0,0 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
// Only for ppocr
|
||||
DEFINE_string(det_model, "", "Path of Detection model of PPOCR.");
|
||||
DEFINE_string(cls_model, "", "Path of Classification model of PPOCR.");
|
||||
DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR.");
|
||||
DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
|
||||
DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR.");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
auto im_rec = cv::imread(FLAGS_image_rec);
|
||||
// Detection Model
|
||||
auto det_model_file =
|
||||
FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel";
|
||||
auto det_params_file =
|
||||
FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams";
|
||||
// Classification Model
|
||||
auto cls_model_file =
|
||||
FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel";
|
||||
auto cls_params_file =
|
||||
FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams";
|
||||
// Recognition Model
|
||||
auto rec_model_file =
|
||||
FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel";
|
||||
auto rec_params_file =
|
||||
FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams";
|
||||
auto rec_label_file = FLAGS_rec_label_file;
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
auto det_option = option;
|
||||
auto cls_option = option;
|
||||
auto rec_option = option;
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
|
||||
{1, 3, 960, 960});
|
||||
cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 1024});
|
||||
rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 2304});
|
||||
}
|
||||
auto det_model = fastdeploy::vision::ocr::DBDetector(
|
||||
det_model_file, det_params_file, det_option);
|
||||
auto cls_model = fastdeploy::vision::ocr::Classifier(
|
||||
cls_model_file, cls_params_file, cls_option);
|
||||
auto rec_model = fastdeploy::vision::ocr::Recognizer(
|
||||
rec_model_file, rec_params_file, rec_label_file, rec_option);
|
||||
// Only for runtime
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
std::vector<std::array<int, 8>> boxes_result;
|
||||
std::cout << "====Detection model====" << std::endl;
|
||||
BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result));
|
||||
int32_t cls_label;
|
||||
float cls_score;
|
||||
std::cout << "====Classification model====" << std::endl;
|
||||
BENCHMARK_MODEL(cls_model,
|
||||
cls_model.Predict(im_rec, &cls_label, &cls_score));
|
||||
std::string text;
|
||||
float rec_score;
|
||||
std::cout << "====Recognization model====" << std::endl;
|
||||
BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score));
|
||||
}
|
||||
auto model_ppocrv3 =
|
||||
fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
|
||||
fastdeploy::vision::OCRResult res;
|
||||
if (FLAGS_profile_mode == "end2end") {
|
||||
BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res))
|
||||
}
|
||||
auto vis_im = fastdeploy::vision::VisOcr(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
57
benchmark/cpp/benchmark_ppocr_cls.cc
Normal file
57
benchmark/cpp/benchmark_ppocr_cls.cc
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
// Classification Model
|
||||
auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 1024});
|
||||
}
|
||||
auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
|
||||
cls_model_file, cls_params_file, option);
|
||||
int32_t res_label;
|
||||
float res_score;
|
||||
// Run once at least
|
||||
model_ppocr_cls.Predict(im, &res_label, &res_score);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
int32_t res_label_expect = 0;
|
||||
float res_score_expect = 1.0;
|
||||
// Calculate diff between two results.
|
||||
auto ppocr_cls_label_diff = res_label - res_label_expect;
|
||||
auto ppocr_cls_score_diff = res_score - res_score_expect;
|
||||
std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
|
||||
std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
|
||||
<< std::endl;
|
||||
BENCHMARK_MODEL(model_ppocr_cls,
|
||||
model_ppocr_cls.Predict(im, &res_label, &res_score));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
63
benchmark/cpp/benchmark_ppocr_det.cc
Normal file
63
benchmark/cpp/benchmark_ppocr_det.cc
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
// Detection Model
|
||||
auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
|
||||
{1, 3, 960, 960});
|
||||
}
|
||||
auto model_ppocr_det =
|
||||
vision::ocr::DBDetector(det_model_file, det_params_file, option);
|
||||
std::vector<std::array<int, 8>> res;
|
||||
// Run once at least
|
||||
model_ppocr_det.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
// Save result to -> disk.
|
||||
std::string ppocr_det_result_path = "ppocr_det_result.txt";
|
||||
benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path);
|
||||
// Load result from <- disk.
|
||||
std::vector<std::array<int, 8>> res_loaded;
|
||||
benchmark::ResultManager::LoadOCRDetResult(&res_loaded,
|
||||
ppocr_det_result_path);
|
||||
// Calculate diff between two results.
|
||||
auto ppocr_det_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
|
||||
std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
|
||||
<< ", max=" << ppocr_det_diff.boxes.max
|
||||
<< ", min=" << ppocr_det_diff.boxes.min << std::endl;
|
||||
BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
59
benchmark/cpp/benchmark_ppocr_rec.cc
Normal file
59
benchmark/cpp/benchmark_ppocr_rec.cc
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
// Recognition Model
|
||||
auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 2304});
|
||||
}
|
||||
auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
|
||||
rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
|
||||
std::string text;
|
||||
float rec_score;
|
||||
// Run once at least
|
||||
model_ppocr_rec.Predict(im, &text, &rec_score);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
std::string text_expect = "上海斯格威铂尔大酒店";
|
||||
float res_score_expect = 0.993308;
|
||||
// Calculate diff between two results.
|
||||
auto ppocr_rec_text_diff = text.compare(text_expect);
|
||||
auto ppocr_rec_score_diff = rec_score - res_score_expect;
|
||||
std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
|
||||
std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
|
||||
<< std::endl;
|
||||
BENCHMARK_MODEL(model_ppocr_rec,
|
||||
model_ppocr_rec.Predict(im, &text, &rec_score));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
29
benchmark/cpp/benchmark_yolov5.cc
Executable file → Normal file
29
benchmark/cpp/benchmark_yolov5.cc
Executable file → Normal file
@@ -16,6 +16,9 @@
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
@@ -24,11 +27,29 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
auto model_yolov5 =
|
||||
fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
|
||||
vision::DetectionResult res;
|
||||
// Run once at least
|
||||
model_yolov5.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
// Save result to -> disk.
|
||||
std::string det_result_path = "yolov5_result.txt";
|
||||
benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
|
||||
// Load result from <- disk.
|
||||
vision::DetectionResult res_loaded;
|
||||
benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
|
||||
// Calculate diff between two results.
|
||||
auto det_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
|
||||
std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
|
||||
<< ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
|
||||
<< std::endl;
|
||||
std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
|
||||
<< ", max=" << det_diff.labels.max
|
||||
<< ", min=" << det_diff.labels.min << std::endl;
|
||||
BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
auto vis_im = vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
#endif
|
||||
|
73
fastdeploy/benchmark/utils.cc
Normal file → Executable file
73
fastdeploy/benchmark/utils.cc
Normal file → Executable file
@@ -474,6 +474,34 @@ bool ResultManager::SaveSegmentationResult(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
|
||||
const std::string& path) {
|
||||
if (res.empty()) {
|
||||
FDERROR << "OCRDetResult can not be empty!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::ofstream fs(path, std::ios::out);
|
||||
if (!fs.is_open()) {
|
||||
FDERROR << "Fail to open file:" << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
fs.precision(20);
|
||||
// boxes
|
||||
fs << "boxes" << KEY_VALUE_SEP;
|
||||
for (int i = 0; i < res.size(); ++i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
if ((i == res.size() - 1) && (j == 7)) {
|
||||
fs << res[i][j];
|
||||
} else {
|
||||
fs << res[i][j] << VALUE_SEP;
|
||||
}
|
||||
}
|
||||
}
|
||||
fs << "\n";
|
||||
fs.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
@@ -556,6 +584,26 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
std::map<std::string, std::vector<std::string>> data;
|
||||
// boxes
|
||||
data = SplitDataLine(lines[0]);
|
||||
int boxes_num = data.begin()->second.size() / 8;
|
||||
res->resize(boxes_num);
|
||||
for (int i = 0; i < boxes_num; ++i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
(*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
DetectionDiff ResultManager::CalculateDiffStatis(
|
||||
const vision::DetectionResult& lhs, const vision::DetectionResult& rhs,
|
||||
const float& score_threshold) {
|
||||
@@ -643,6 +691,31 @@ SegmentationDiff ResultManager::CalculateDiffStatis(
|
||||
return diff;
|
||||
}
|
||||
|
||||
OCRDetDiff ResultManager::CalculateDiffStatis(
|
||||
const std::vector<std::array<int, 8>>& lhs,
|
||||
const std::vector<std::array<int, 8>>& rhs) {
|
||||
const int boxes_nums = std::min(lhs.size(), rhs.size());
|
||||
std::vector<std::array<int, 8>> lhs_sort = lhs;
|
||||
std::vector<std::array<int, 8>> rhs_sort = rhs;
|
||||
// lex sort by x(w) & y(h)
|
||||
vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
|
||||
vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
|
||||
// get value diff
|
||||
const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
|
||||
std::vector<float> boxes_diff;
|
||||
for (int i = 0; i < boxes_num; ++i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
OCRDetDiff diff;
|
||||
CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
|
||||
&(diff.boxes.mean), &(diff.boxes.max),
|
||||
&(diff.boxes.min));
|
||||
return diff;
|
||||
}
|
||||
|
||||
#endif // ENABLE_VISION
|
||||
#endif // ENABLE_BENCHMARK
|
||||
|
||||
|
@@ -122,6 +122,10 @@ struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff {
|
||||
EvalStatis labels;
|
||||
};
|
||||
|
||||
struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff {
|
||||
EvalStatis boxes;
|
||||
};
|
||||
|
||||
#endif // ENABLE_VISION
|
||||
#endif // ENABLE_BENCHMARK
|
||||
|
||||
@@ -148,6 +152,10 @@ struct FASTDEPLOY_DECL ResultManager {
|
||||
const std::string& path);
|
||||
static bool LoadSegmentationResult(vision::SegmentationResult* res,
|
||||
const std::string& path);
|
||||
static bool SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
|
||||
const std::string& path);
|
||||
static bool LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
|
||||
const std::string& path);
|
||||
/// Calculate diff value between two basic results.
|
||||
static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs,
|
||||
const vision::DetectionResult& rhs,
|
||||
@@ -157,6 +165,9 @@ struct FASTDEPLOY_DECL ResultManager {
|
||||
static SegmentationDiff CalculateDiffStatis(
|
||||
const vision::SegmentationResult& lhs,
|
||||
const vision::SegmentationResult& rhs);
|
||||
static OCRDetDiff CalculateDiffStatis(
|
||||
const std::vector<std::array<int, 8>>& lhs,
|
||||
const std::vector<std::array<int, 8>>& rhs);
|
||||
#endif // ENABLE_VISION
|
||||
#endif // ENABLE_BENCHMARK
|
||||
};
|
||||
|
@@ -13,15 +13,14 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision/classification/contrib/resnet.h"
|
||||
#include "fastdeploy/vision/utils/utils.h"
|
||||
#include "fastdeploy/utils/perf.h"
|
||||
#include "fastdeploy/vision/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace classification {
|
||||
|
||||
ResNet::ResNet(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
ResNet::ResNet(const std::string& model_file, const std::string& params_file,
|
||||
const RuntimeOption& custom_option,
|
||||
const ModelFormat& model_format) {
|
||||
// In constructor, the 3 steps below are necessary.
|
||||
@@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file,
|
||||
}
|
||||
|
||||
bool ResNet::Initialize() {
|
||||
|
||||
// In this function, the 3 steps below are necessary.
|
||||
// 1. assign values to the global variables 2. call InitRuntime()
|
||||
|
||||
@@ -57,14 +55,15 @@ bool ResNet::Initialize() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
|
||||
// In this function, the preprocess need be implemented according to the
|
||||
// original Repos,
|
||||
// The result of preprocess has to be saved in FDTensor variable, because the
|
||||
// input of Infer() need to be std::vector<FDTensor>.
|
||||
// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
|
||||
// FDTensor variable.
|
||||
|
||||
// In this function, the preprocess need be implemented according to the original Repos,
|
||||
// The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector<FDTensor>.
|
||||
// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable.
|
||||
|
||||
if (mat->Height()!=size[0] || mat->Width()!=size[1]){
|
||||
if (mat->Height() != size[0] || mat->Width() != size[1]) {
|
||||
int interp = cv::INTER_LINEAR;
|
||||
Resize::Run(mat, size[1], size[0], -1, -1, interp);
|
||||
}
|
||||
@@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ResNet::Postprocess(FDTensor& infer_result,
|
||||
ClassifyResult* result, int topk) {
|
||||
|
||||
// In this function, the postprocess need be implemented according to the original Repos,
|
||||
// Finally the reslut of postprocess should be saved in ClassifyResult variable.
|
||||
// 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable.
|
||||
bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result,
|
||||
int topk) {
|
||||
// In this function, the postprocess need be implemented according to the
|
||||
// original Repos,
|
||||
// Finally the reslut of postprocess should be saved in ClassifyResult
|
||||
// variable.
|
||||
// 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
|
||||
// variable.
|
||||
|
||||
int num_classes = infer_result.shape[1];
|
||||
function::Softmax(infer_result, &infer_result);
|
||||
const float* infer_result_buffer = reinterpret_cast<float*>(infer_result.Data());
|
||||
const float* infer_result_buffer =
|
||||
reinterpret_cast<float*>(infer_result.Data());
|
||||
topk = std::min(num_classes, topk);
|
||||
result->label_ids =
|
||||
utils::TopKIndices(infer_result_buffer, num_classes, topk);
|
||||
@@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result,
|
||||
}
|
||||
|
||||
bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
|
||||
|
||||
// In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially.
|
||||
// In this function, the Preprocess(), Infer(), and Postprocess() are called
|
||||
// sequentially.
|
||||
|
||||
Mat mat(*im);
|
||||
std::vector<FDTensor> processed_data(1);
|
||||
@@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // namespace classification
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
||||
|
@@ -23,7 +23,8 @@ YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
|
||||
size_ = {224, 224}; //{h,w}
|
||||
}
|
||||
|
||||
bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
bool YOLOv5ClsPreprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
@@ -54,14 +55,16 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool YOLOv5ClsPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool YOLOv5ClsPreprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
|
@@ -23,7 +23,8 @@ FastestDetPreprocessor::FastestDetPreprocessor() {
|
||||
size_ = {352, 352}; //{h,w}
|
||||
}
|
||||
|
||||
bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
bool FastestDetPreprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
@@ -36,10 +37,10 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
// fastestdet's preprocess steps
|
||||
// 1. resize
|
||||
// 2. convert_and_permute(swap_rb=false)
|
||||
Resize::Run(mat, size_[0], size_[1]); //resize
|
||||
Resize::Run(mat, size_[0], size_[1]); // resize
|
||||
std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
|
||||
std::vector<float> beta = {0.0f, 0.0f, 0.0f};
|
||||
//convert to float and HWC2CHW
|
||||
// convert to float and HWC2CHW
|
||||
ConvertAndPermute::Run(mat, alpha, beta, false);
|
||||
|
||||
// Record output shape of preprocessed image
|
||||
@@ -47,14 +48,16 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FastestDetPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool FastestDetPreprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
|
@@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
2
fastdeploy/vision/detection/contrib/scaledyolov4.cc
Executable file → Normal file
2
fastdeploy/vision/detection/contrib/scaledyolov4.cc
Executable file → Normal file
@@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
2
fastdeploy/vision/detection/contrib/yolor.cc
Executable file → Normal file
2
fastdeploy/vision/detection/contrib/yolor.cc
Executable file → Normal file
@@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
11
fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
Executable file → Normal file
11
fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
Executable file → Normal file
@@ -64,7 +64,8 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
|
||||
}
|
||||
}
|
||||
|
||||
bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
bool YOLOv5Preprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
@@ -82,14 +83,16 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool YOLOv5Preprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
|
@@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess(
|
||||
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
|
||||
input_tensor_cuda_buffer_device_);
|
||||
output->device = Device::GPU;
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
#else
|
||||
FDERROR << "CUDA src code was not enabled." << std::endl;
|
||||
|
@@ -64,7 +64,8 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) {
|
||||
}
|
||||
}
|
||||
|
||||
bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
bool YOLOv5SegPreprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
@@ -82,14 +83,16 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool YOLOv5SegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool YOLOv5SegPreprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
|
@@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess(
|
||||
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
|
||||
input_tensor_cuda_buffer_device_);
|
||||
output->device = Device::GPU;
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
#else
|
||||
FDERROR << "CUDA src code was not enabled." << std::endl;
|
||||
|
11
fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
Executable file → Normal file
11
fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
Executable file → Normal file
@@ -64,7 +64,8 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
|
||||
}
|
||||
}
|
||||
|
||||
bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
bool YOLOv7Preprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
@@ -82,14 +83,16 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool YOLOv7Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool YOLOv7Preprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
|
5
fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
Executable file → Normal file
5
fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
Executable file → Normal file
@@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) {
|
||||
if (!Postprocess(reused_output_tensors_[0], result, im_info,
|
||||
conf_threshold)) {
|
||||
FDERROR << "Failed to post process." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
@@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess(
|
||||
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
|
||||
input_tensor_cuda_buffer_device_);
|
||||
output->device = Device::GPU;
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
#else
|
||||
FDERROR << "CUDA src code was not enabled." << std::endl;
|
||||
|
@@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess(
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -22,8 +22,7 @@ namespace vision {
|
||||
|
||||
namespace facealign {
|
||||
|
||||
PFLD::PFLD(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
PFLD::PFLD(const std::string& model_file, const std::string& params_file,
|
||||
const RuntimeOption& custom_option,
|
||||
const ModelFormat& model_format) {
|
||||
if (model_format == ModelFormat::ONNX) {
|
||||
@@ -71,11 +70,12 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output,
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
|
||||
bool PFLD::Postprocess(
|
||||
FDTensor& infer_result, FaceAlignmentResult* result,
|
||||
const std::map<std::string, std::array<int, 2>>& im_info) {
|
||||
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
|
||||
if (infer_result.dtype != FDDataType::FP32) {
|
||||
@@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
|
||||
}
|
||||
|
||||
auto iter_in = im_info.find("input_shape");
|
||||
FDASSERT(iter_in != im_info.end(),
|
||||
"Cannot find input_shape from im_info.");
|
||||
FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
|
||||
int in_h = iter_in->second[0];
|
||||
int in_w = iter_in->second[1];
|
||||
|
||||
@@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
|
||||
x = std::min(std::max(0.f, x), 1.0f);
|
||||
y = std::min(std::max(0.f, y), 1.0f);
|
||||
// decode landmarks (default 106 landmarks)
|
||||
result->landmarks.emplace_back(
|
||||
std::array<float, 2>{x * in_w, y * in_h});
|
||||
result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output,
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -145,7 +145,7 @@ bool RetinaFace::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -90,7 +90,7 @@ bool UltraFace::Preprocess(
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -151,7 +151,7 @@ bool YOLOv5Face::Preprocess(
|
||||
Cast::Run(mat, "float");
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
|
||||
max_wh_ = 7680.0;
|
||||
}
|
||||
|
||||
bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
bool Yolov7FacePreprocessor::Run(
|
||||
std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||
std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
|
||||
if (images->size() == 0) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
ims_info->resize(images->size());
|
||||
@@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info){
|
||||
bool Yolov7FacePreprocessor::Preprocess(
|
||||
FDMat* mat, FDTensor* output,
|
||||
std::map<std::string, std::array<float, 2>>* im_info) {
|
||||
// Record the shape of image and the shape of preprocessed image
|
||||
(*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
|
||||
static_cast<float>(mat->Width())};
|
||||
@@ -75,7 +78,7 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
|
||||
static_cast<float>(mat->Width())};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
8
fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
Executable file → Normal file
8
fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
Executable file → Normal file
@@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() {
|
||||
permute_ = true;
|
||||
}
|
||||
|
||||
bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
|
||||
|
||||
bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) {
|
||||
// face recognition model's preprocess steps in insightface
|
||||
// reference: insightface/recognition/arcface_torch/inference.py
|
||||
// 1. Resize
|
||||
@@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
|
||||
Cast::Run(mat, "float");
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AdaFacePreprocessor::Run(std::vector<FDMat>* images,
|
||||
std::vector<FDTensor>* outputs) {
|
||||
if (images->empty()) {
|
||||
FDERROR << "The size of input images should be greater than 0." << std::endl;
|
||||
FDERROR << "The size of input images should be greater than 0."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
FDASSERT(images->size() == 1, "Only support batch = 1 now.");
|
||||
|
@@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat,
|
||||
}
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -22,8 +22,7 @@ namespace vision {
|
||||
|
||||
namespace headpose {
|
||||
|
||||
FSANet::FSANet(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
FSANet::FSANet(const std::string& model_file, const std::string& params_file,
|
||||
const RuntimeOption& custom_option,
|
||||
const ModelFormat& model_format) {
|
||||
if (model_format == ModelFormat::ONNX) {
|
||||
@@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
|
||||
|
||||
// Normalize
|
||||
std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
|
||||
std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f};
|
||||
std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f,
|
||||
-127.5f / 128.0f};
|
||||
Convert::Run(mat, alpha, beta);
|
||||
|
||||
// Record output shape of preprocessed image
|
||||
@@ -72,11 +72,12 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
|
||||
Cast::Run(mat, "float");
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
|
||||
bool FSANet::Postprocess(
|
||||
FDTensor& infer_result, HeadPoseResult* result,
|
||||
const std::map<std::string, std::array<int, 2>>& im_info) {
|
||||
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
|
||||
if (infer_result.dtype != FDDataType::FP32) {
|
||||
@@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
|
||||
}
|
||||
|
||||
auto iter_in = im_info.find("input_shape");
|
||||
FDASSERT(iter_in != im_info.end(),
|
||||
"Cannot find input_shape from im_info.");
|
||||
FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
|
||||
int in_h = iter_in->second[0];
|
||||
int in_w = iter_in->second[1];
|
||||
|
||||
|
@@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output,
|
||||
Cast::Run(mat, "float");
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
|
||||
output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
2
fastdeploy/vision/matting/contrib/rvm.cc
Executable file → Normal file
2
fastdeploy/vision/matting/contrib/rvm.cc
Executable file → Normal file
@@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess(
|
||||
(*im_info)["output_shape"] = {mat->Height(), mat->Width()};
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->ExpandDim(0); // reshape to n, h, w, c
|
||||
output->ExpandDim(0); // reshape to n, c, h, w
|
||||
return true;
|
||||
}
|
||||
|
||||
|
62
fastdeploy/vision/utils/sort_det_res.cc
Normal file → Executable file
62
fastdeploy/vision/utils/sort_det_res.cc
Normal file → Executable file
@@ -77,27 +77,42 @@ void SortDetectionResult(DetectionResult* result) {
|
||||
MergeSort(result, low, high);
|
||||
}
|
||||
|
||||
bool LexSortByXYCompare(const std::array<float, 4>& box_a,
|
||||
const std::array<float, 4>& box_b) {
|
||||
template <typename T>
|
||||
bool LexSortByXYCompare(const std::array<T, 4>& box_a,
|
||||
const std::array<T, 4>& box_b) {
|
||||
// WARN: The status shoule be false if (a==b).
|
||||
// https://blog.csdn.net/xxxwrq/article/details/83080640
|
||||
auto is_equal = [](const float& a, const float& b) -> bool {
|
||||
auto is_equal = [](const T& a, const T& b) -> bool {
|
||||
return std::abs(a - b) < 1e-6f;
|
||||
};
|
||||
const float& x0_a = box_a[0];
|
||||
const float& y0_a = box_a[1];
|
||||
const float& x0_b = box_b[0];
|
||||
const float& y0_b = box_b[1];
|
||||
const T& x0_a = box_a[0];
|
||||
const T& y0_a = box_a[1];
|
||||
const T& x0_b = box_b[0];
|
||||
const T& y0_b = box_b[1];
|
||||
if (is_equal(x0_a, x0_b)) {
|
||||
return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
|
||||
}
|
||||
return x0_a > x0_b;
|
||||
}
|
||||
|
||||
// Only for int dtype
|
||||
template <>
|
||||
bool LexSortByXYCompare(const std::array<int, 4>& box_a,
|
||||
const std::array<int, 4>& box_b) {
|
||||
const int& x0_a = box_a[0];
|
||||
const int& y0_a = box_a[1];
|
||||
const int& x0_b = box_b[0];
|
||||
const int& y0_b = box_b[1];
|
||||
if (x0_a == x0_b) {
|
||||
return y0_a == y0_b ? false : y0_a > y0_b;
|
||||
}
|
||||
return x0_a > x0_b;
|
||||
}
|
||||
|
||||
void ReorderDetectionResultByIndices(DetectionResult* result,
|
||||
const std::vector<size_t>& indices) {
|
||||
// reorder boxes, scores, label_ids, masks
|
||||
DetectionResult backup = (*result); // move
|
||||
DetectionResult backup = (*result);
|
||||
const bool contain_masks = backup.contain_masks;
|
||||
const int boxes_num = backup.boxes.size();
|
||||
result->Clear();
|
||||
@@ -122,7 +137,7 @@ void ReorderDetectionResultByIndices(DetectionResult* result,
|
||||
}
|
||||
|
||||
void LexSortDetectionResultByXY(DetectionResult* result) {
|
||||
if (result->boxes.size() == 0) {
|
||||
if (result->boxes.empty()) {
|
||||
return;
|
||||
}
|
||||
std::vector<size_t> indices;
|
||||
@@ -138,6 +153,35 @@ void LexSortDetectionResultByXY(DetectionResult* result) {
|
||||
ReorderDetectionResultByIndices(result, indices);
|
||||
}
|
||||
|
||||
void LexSortOCRDetResultByXY(std::vector<std::array<int, 8>>* result) {
|
||||
if (result->empty()) {
|
||||
return;
|
||||
}
|
||||
std::vector<size_t> indices;
|
||||
indices.resize(result->size());
|
||||
std::vector<std::array<int, 4>> boxes;
|
||||
boxes.resize(result->size());
|
||||
for (size_t i = 0; i < result->size(); ++i) {
|
||||
indices[i] = i;
|
||||
// 4 points to 2 points for LexSort
|
||||
boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6],
|
||||
(*result)[i][7]};
|
||||
}
|
||||
// lex sort by x(w) then y(h)
|
||||
std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
|
||||
return LexSortByXYCompare(boxes[a], boxes[b]);
|
||||
});
|
||||
// reorder boxes
|
||||
std::vector<std::array<int, 8>> backup = (*result);
|
||||
const int boxes_num = backup.size();
|
||||
result->clear();
|
||||
result->resize(boxes_num);
|
||||
// boxes
|
||||
for (int i = 0; i < boxes_num; ++i) {
|
||||
(*result)[i] = backup[indices[i]];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
||||
|
5
fastdeploy/vision/utils/utils.h
Normal file → Executable file
5
fastdeploy/vision/utils/utils.h
Normal file → Executable file
@@ -67,8 +67,11 @@ void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
|
||||
/// Sort DetectionResult/FaceDetectionResult by score
|
||||
FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result);
|
||||
FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result);
|
||||
/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis
|
||||
/// Lex Sort DetectionResult by x(w) & y(h) axis
|
||||
FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result);
|
||||
/// Lex Sort OCRDet Result by x(w) & y(h) axis
|
||||
FASTDEPLOY_DECL void LexSortOCRDetResultByXY(
|
||||
std::vector<std::array<int, 8>>* result);
|
||||
|
||||
/// L2 Norm / cosine similarity (for face recognition, ...)
|
||||
FASTDEPLOY_DECL std::vector<float>
|
||||
|
Reference in New Issue
Block a user