diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt index f839eb228..a6f0b87c2 100755 --- a/benchmark/cpp/CMakeLists.txt +++ b/benchmark/cpp/CMakeLists.txt @@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc) add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc) add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc) add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc) -add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc) +add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc) +add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc) +add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc) if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread) @@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread) - target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread) else() target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags) - target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags) endif() diff --git a/benchmark/cpp/benchmark_ppocr.cc b/benchmark/cpp/benchmark_ppocr.cc deleted file mode 100755 index e81080c54..000000000 --- a/benchmark/cpp/benchmark_ppocr.cc +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "flags.h" -#include "macros.h" -#include "option.h" - -// Only for ppocr -DEFINE_string(det_model, "", "Path of Detection model of PPOCR."); -DEFINE_string(cls_model, "", "Path of Classification model of PPOCR."); -DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR."); -DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR."); -DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR."); - -int main(int argc, char* argv[]) { -#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) - // Initialization - auto option = fastdeploy::RuntimeOption(); - if (!CreateRuntimeOption(&option, argc, argv, true)) { - return -1; - } - auto im = cv::imread(FLAGS_image); - auto im_rec = cv::imread(FLAGS_image_rec); - // Detection Model - auto det_model_file = - FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel"; - auto det_params_file = - FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams"; - // Classification Model - auto cls_model_file = - FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel"; - auto cls_params_file = - FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams"; - // Recognition Model - auto rec_model_file = - FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel"; - auto rec_params_file = - FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams"; - auto rec_label_file = FLAGS_rec_label_file; - if (FLAGS_backend == "paddle_trt") { - option.paddle_infer_option.collect_trt_shape = true; - } - auto det_option = option; - auto cls_option = option; - auto rec_option = option; - if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") { - det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640}, - {1, 3, 960, 960}); - cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320}, - {8, 3, 48, 1024}); - rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320}, - {8, 3, 48, 2304}); - } - auto det_model = fastdeploy::vision::ocr::DBDetector( - det_model_file, det_params_file, det_option); - auto cls_model = fastdeploy::vision::ocr::Classifier( - cls_model_file, cls_params_file, cls_option); - auto rec_model = fastdeploy::vision::ocr::Recognizer( - rec_model_file, rec_params_file, rec_label_file, rec_option); - // Only for runtime - if (FLAGS_profile_mode == "runtime") { - std::vector> boxes_result; - std::cout << "====Detection model====" << std::endl; - BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result)); - int32_t cls_label; - float cls_score; - std::cout << "====Classification model====" << std::endl; - BENCHMARK_MODEL(cls_model, - cls_model.Predict(im_rec, &cls_label, &cls_score)); - std::string text; - float rec_score; - std::cout << "====Recognization model====" << std::endl; - BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score)); - } - auto model_ppocrv3 = - fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); - fastdeploy::vision::OCRResult res; - if (FLAGS_profile_mode == "end2end") { - BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res)) - } - auto vis_im = fastdeploy::vision::VisOcr(im, res); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; -#endif - return 0; -} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_ppocr_cls.cc b/benchmark/cpp/benchmark_ppocr_cls.cc new file mode 100644 index 000000000..0ddd939bc --- /dev/null +++ b/benchmark/cpp/benchmark_ppocr_cls.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "flags.h" +#include "macros.h" +#include "option.h" + +int main(int argc, char* argv[]) { +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option, argc, argv, true)) { + return -1; + } + auto im = cv::imread(FLAGS_image); + // Classification Model + auto cls_model_file = FLAGS_model + sep + "inference.pdmodel"; + auto cls_params_file = FLAGS_model + sep + "inference.pdiparams"; + if (FLAGS_backend == "paddle_trt") { + option.paddle_infer_option.collect_trt_shape = true; + } + if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") { + option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320}, + {8, 3, 48, 1024}); + } + auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier( + cls_model_file, cls_params_file, option); + int32_t res_label; + float res_score; + // Run once at least + model_ppocr_cls.Predict(im, &res_label, &res_score); + // 1. Test result diff + std::cout << "=============== Test result diff =================\n"; + int32_t res_label_expect = 0; + float res_score_expect = 1.0; + // Calculate diff between two results. + auto ppocr_cls_label_diff = res_label - res_label_expect; + auto ppocr_cls_score_diff = res_score - res_score_expect; + std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl; + std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff) + << std::endl; + BENCHMARK_MODEL(model_ppocr_cls, + model_ppocr_cls.Predict(im, &res_label, &res_score)); +#endif + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_ppocr_det.cc b/benchmark/cpp/benchmark_ppocr_det.cc new file mode 100644 index 000000000..f98b1c9f3 --- /dev/null +++ b/benchmark/cpp/benchmark_ppocr_det.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "flags.h" +#include "macros.h" +#include "option.h" + +namespace vision = fastdeploy::vision; +namespace benchmark = fastdeploy::benchmark; + +int main(int argc, char* argv[]) { +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option, argc, argv, true)) { + return -1; + } + auto im = cv::imread(FLAGS_image); + // Detection Model + auto det_model_file = FLAGS_model + sep + "inference.pdmodel"; + auto det_params_file = FLAGS_model + sep + "inference.pdiparams"; + if (FLAGS_backend == "paddle_trt") { + option.paddle_infer_option.collect_trt_shape = true; + } + if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") { + option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640}, + {1, 3, 960, 960}); + } + auto model_ppocr_det = + vision::ocr::DBDetector(det_model_file, det_params_file, option); + std::vector> res; + // Run once at least + model_ppocr_det.Predict(im, &res); + // 1. Test result diff + std::cout << "=============== Test result diff =================\n"; + // Save result to -> disk. + std::string ppocr_det_result_path = "ppocr_det_result.txt"; + benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path); + // Load result from <- disk. + std::vector> res_loaded; + benchmark::ResultManager::LoadOCRDetResult(&res_loaded, + ppocr_det_result_path); + // Calculate diff between two results. + auto ppocr_det_diff = + benchmark::ResultManager::CalculateDiffStatis(res, res_loaded); + std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean + << ", max=" << ppocr_det_diff.boxes.max + << ", min=" << ppocr_det_diff.boxes.min << std::endl; + BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res)); +#endif + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_ppocr_rec.cc b/benchmark/cpp/benchmark_ppocr_rec.cc new file mode 100644 index 000000000..71bb6b353 --- /dev/null +++ b/benchmark/cpp/benchmark_ppocr_rec.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "flags.h" +#include "macros.h" +#include "option.h" + +DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR."); + +int main(int argc, char* argv[]) { +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option, argc, argv, true)) { + return -1; + } + auto im = cv::imread(FLAGS_image); + // Recognition Model + auto rec_model_file = FLAGS_model + sep + "inference.pdmodel"; + auto rec_params_file = FLAGS_model + sep + "inference.pdiparams"; + if (FLAGS_backend == "paddle_trt") { + option.paddle_infer_option.collect_trt_shape = true; + } + if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") { + option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320}, + {8, 3, 48, 2304}); + } + auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer( + rec_model_file, rec_params_file, FLAGS_rec_label_file, option); + std::string text; + float rec_score; + // Run once at least + model_ppocr_rec.Predict(im, &text, &rec_score); + // 1. Test result diff + std::cout << "=============== Test result diff =================\n"; + std::string text_expect = "上海斯格威铂尔大酒店"; + float res_score_expect = 0.993308; + // Calculate diff between two results. + auto ppocr_rec_text_diff = text.compare(text_expect); + auto ppocr_rec_score_diff = rec_score - res_score_expect; + std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl; + std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff) + << std::endl; + BENCHMARK_MODEL(model_ppocr_rec, + model_ppocr_rec.Predict(im, &text, &rec_score)); +#endif + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100755 new mode 100644 index 07c36e31e..848851de9 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -16,6 +16,9 @@ #include "macros.h" #include "option.h" +namespace vision = fastdeploy::vision; +namespace benchmark = fastdeploy::benchmark; + int main(int argc, char* argv[]) { #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) // Initialization @@ -24,11 +27,29 @@ int main(int argc, char* argv[]) { return -1; } auto im = cv::imread(FLAGS_image); - auto model_yolov5 = - fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option); - fastdeploy::vision::DetectionResult res; + auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option); + vision::DetectionResult res; + // Run once at least + model_yolov5.Predict(im, &res); + // 1. Test result diff + std::cout << "=============== Test result diff =================\n"; + // Save result to -> disk. + std::string det_result_path = "yolov5_result.txt"; + benchmark::ResultManager::SaveDetectionResult(res, det_result_path); + // Load result from <- disk. + vision::DetectionResult res_loaded; + benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path); + // Calculate diff between two results. + auto det_diff = + benchmark::ResultManager::CalculateDiffStatis(res, res_loaded); + std::cout << "Boxes diff: mean=" << det_diff.boxes.mean + << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min + << std::endl; + std::cout << "Label_ids diff: mean=" << det_diff.labels.mean + << ", max=" << det_diff.labels.max + << ", min=" << det_diff.labels.min << std::endl; BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res)) - auto vis_im = fastdeploy::vision::VisDetection(im, res); + auto vis_im = vision::VisDetection(im, res); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; #endif diff --git a/benchmark/cpp/run_benchmark_ppyolov8.sh b/benchmark/cpp/run_benchmark_ppyolov8.sh deleted file mode 100644 index e69de29bb..000000000 diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc old mode 100644 new mode 100755 index 5af28e4b1..a66bdb6c0 --- a/fastdeploy/benchmark/utils.cc +++ b/fastdeploy/benchmark/utils.cc @@ -474,6 +474,34 @@ bool ResultManager::SaveSegmentationResult( return true; } +bool ResultManager::SaveOCRDetResult(const std::vector>& res, + const std::string& path) { + if (res.empty()) { + FDERROR << "OCRDetResult can not be empty!" << std::endl; + return false; + } + std::ofstream fs(path, std::ios::out); + if (!fs.is_open()) { + FDERROR << "Fail to open file:" << path << std::endl; + return false; + } + fs.precision(20); + // boxes + fs << "boxes" << KEY_VALUE_SEP; + for (int i = 0; i < res.size(); ++i) { + for (int j = 0; j < 8; ++j) { + if ((i == res.size() - 1) && (j == 7)) { + fs << res[i][j]; + } else { + fs << res[i][j] << VALUE_SEP; + } + } + } + fs << "\n"; + fs.close(); + return true; +} + bool ResultManager::LoadDetectionResult(vision::DetectionResult* res, const std::string& path) { if (!CheckFileExists(path)) { @@ -556,6 +584,26 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res, return true; } +bool ResultManager::LoadOCRDetResult(std::vector>* res, + const std::string& path) { + if (!CheckFileExists(path)) { + FDERROR << "Can't found file from" << path << std::endl; + return false; + } + auto lines = ReadLines(path); + std::map> data; + // boxes + data = SplitDataLine(lines[0]); + int boxes_num = data.begin()->second.size() / 8; + res->resize(boxes_num); + for (int i = 0; i < boxes_num; ++i) { + for (int j = 0; j < 8; ++j) { + (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]); + } + } + return true; +} + DetectionDiff ResultManager::CalculateDiffStatis( const vision::DetectionResult& lhs, const vision::DetectionResult& rhs, const float& score_threshold) { @@ -643,6 +691,31 @@ SegmentationDiff ResultManager::CalculateDiffStatis( return diff; } +OCRDetDiff ResultManager::CalculateDiffStatis( + const std::vector>& lhs, + const std::vector>& rhs) { + const int boxes_nums = std::min(lhs.size(), rhs.size()); + std::vector> lhs_sort = lhs; + std::vector> rhs_sort = rhs; + // lex sort by x(w) & y(h) + vision::utils::LexSortOCRDetResultByXY(&lhs_sort); + vision::utils::LexSortOCRDetResultByXY(&rhs_sort); + // get value diff + const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size()); + std::vector boxes_diff; + for (int i = 0; i < boxes_num; ++i) { + for (int j = 0; j < 8; ++j) { + boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]); + } + } + + OCRDetDiff diff; + CalculateStatisInfo(boxes_diff.data(), boxes_diff.size(), + &(diff.boxes.mean), &(diff.boxes.max), + &(diff.boxes.min)); + return diff; +} + #endif // ENABLE_VISION #endif // ENABLE_BENCHMARK diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h index f4d608133..2ad0ae4aa 100755 --- a/fastdeploy/benchmark/utils.h +++ b/fastdeploy/benchmark/utils.h @@ -122,6 +122,10 @@ struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff { EvalStatis labels; }; +struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff { + EvalStatis boxes; +}; + #endif // ENABLE_VISION #endif // ENABLE_BENCHMARK @@ -148,6 +152,10 @@ struct FASTDEPLOY_DECL ResultManager { const std::string& path); static bool LoadSegmentationResult(vision::SegmentationResult* res, const std::string& path); + static bool SaveOCRDetResult(const std::vector>& res, + const std::string& path); + static bool LoadOCRDetResult(std::vector>* res, + const std::string& path); /// Calculate diff value between two basic results. static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs, const vision::DetectionResult& rhs, @@ -157,6 +165,9 @@ struct FASTDEPLOY_DECL ResultManager { static SegmentationDiff CalculateDiffStatis( const vision::SegmentationResult& lhs, const vision::SegmentationResult& rhs); + static OCRDetDiff CalculateDiffStatis( + const std::vector>& lhs, + const std::vector>& rhs); #endif // ENABLE_VISION #endif // ENABLE_BENCHMARK }; diff --git a/fastdeploy/vision/classification/contrib/resnet.cc b/fastdeploy/vision/classification/contrib/resnet.cc index 2eed67992..fffbeada6 100644 --- a/fastdeploy/vision/classification/contrib/resnet.cc +++ b/fastdeploy/vision/classification/contrib/resnet.cc @@ -13,23 +13,22 @@ // limitations under the License. #include "fastdeploy/vision/classification/contrib/resnet.h" -#include "fastdeploy/vision/utils/utils.h" #include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/utils/utils.h" namespace fastdeploy { namespace vision { namespace classification { -ResNet::ResNet(const std::string& model_file, - const std::string& params_file, +ResNet::ResNet(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { // In constructor, the 3 steps below are necessary. // 1. set the Backend 2. set RuntimeOption 3. call Initialize() if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER}; @@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file, } bool ResNet::Initialize() { - // In this function, the 3 steps below are necessary. // 1. assign values to the global variables 2. call InitRuntime() @@ -57,14 +55,15 @@ bool ResNet::Initialize() { return true; } - bool ResNet::Preprocess(Mat* mat, FDTensor* output) { + // In this function, the preprocess need be implemented according to the + // original Repos, + // The result of preprocess has to be saved in FDTensor variable, because the + // input of Infer() need to be std::vector. + // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into + // FDTensor variable. -// In this function, the preprocess need be implemented according to the original Repos, -// The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector. -// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable. - - if (mat->Height()!=size[0] || mat->Width()!=size[1]){ + if (mat->Height() != size[0] || mat->Width() != size[1]) { int interp = cv::INTER_LINEAR; Resize::Run(mat, size[1], size[0], -1, -1, interp); } @@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) { HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } -bool ResNet::Postprocess(FDTensor& infer_result, - ClassifyResult* result, int topk) { - - // In this function, the postprocess need be implemented according to the original Repos, - // Finally the reslut of postprocess should be saved in ClassifyResult variable. - // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable. +bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result, + int topk) { + // In this function, the postprocess need be implemented according to the + // original Repos, + // Finally the reslut of postprocess should be saved in ClassifyResult + // variable. + // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult + // variable. int num_classes = infer_result.shape[1]; function::Softmax(infer_result, &infer_result); - const float* infer_result_buffer = reinterpret_cast(infer_result.Data()); + const float* infer_result_buffer = + reinterpret_cast(infer_result.Data()); topk = std::min(num_classes, topk); result->label_ids = utils::TopKIndices(infer_result_buffer, num_classes, topk); @@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result, } bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) { - - // In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially. + // In this function, the Preprocess(), Infer(), and Postprocess() are called + // sequentially. Mat mat(*im); std::vector processed_data(1); @@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) { return true; } - } // namespace classification } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc index e252ba0ee..35b3e17bb 100644 --- a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc +++ b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc @@ -20,18 +20,19 @@ namespace vision { namespace classification { YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() { - size_ = {224, 224}; //{h,w} + size_ = {224, 224}; //{h,w} } -bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { +bool YOLOv5ClsPreprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; // process after image load double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); + static_cast(mat->Width())); // yolov5cls's preprocess steps // 1. CenterCrop @@ -54,20 +55,22 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } -bool YOLOv5ClsPreprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool YOLOv5ClsPreprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc index f4ff11e8f..7b8fcc399 100644 --- a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc @@ -20,26 +20,27 @@ namespace vision { namespace detection { FastestDetPreprocessor::FastestDetPreprocessor() { - size_ = {352, 352}; //{h,w} + size_ = {352, 352}; //{h,w} } -bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { +bool FastestDetPreprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; // process after image load double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); + static_cast(mat->Width())); // fastestdet's preprocess steps // 1. resize // 2. convert_and_permute(swap_rb=false) - Resize::Run(mat, size_[0], size_[1]); //resize + Resize::Run(mat, size_[0], size_[1]); // resize std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; std::vector beta = {0.0f, 0.0f, 0.0f}; - //convert to float and HWC2CHW + // convert to float and HWC2CHW ConvertAndPermute::Run(mat, alpha, beta, false); // Record output shape of preprocessed image @@ -47,20 +48,22 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } -bool FastestDetPreprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool FastestDetPreprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/fastdeploy/vision/detection/contrib/nanodet_plus.cc index 2babae49c..0b89cdbe2 100644 --- a/fastdeploy/vision/detection/contrib/nanodet_plus.cc +++ b/fastdeploy/vision/detection/contrib/nanodet_plus.cc @@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc old mode 100755 new mode 100644 index 8678ea181..88c34352b --- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc +++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc @@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER}; @@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc old mode 100755 new mode 100644 index dd4ef728a..cad66eb08 --- a/fastdeploy/vision/detection/contrib/yolor.cc +++ b/fastdeploy/vision/detection/contrib/yolor.cc @@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER}; valid_gpu_backends = {Backend::PDINFER}; @@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc old mode 100755 new mode 100644 index 846e25131..658987b75 --- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc @@ -64,8 +64,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) { } } -bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { +bool YOLOv5Preprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; @@ -82,20 +83,22 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } -bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool YOLOv5Preprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc index be4116eed..8d8f325dc 100644 --- a/fastdeploy/vision/detection/contrib/yolov5lite.cc +++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc @@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } @@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess( output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, input_tensor_cuda_buffer_device_); output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; #else FDERROR << "CUDA src code was not enabled." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc index b880ed337..e5bd82630 100644 --- a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc @@ -64,8 +64,9 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) { } } -bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { +bool YOLOv5SegPreprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; @@ -82,20 +83,22 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } -bool YOLOv5SegPreprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool YOLOv5SegPreprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov6.cc b/fastdeploy/vision/detection/contrib/yolov6.cc index cae9ce3a6..bf3368242 100644 --- a/fastdeploy/vision/detection/contrib/yolov6.cc +++ b/fastdeploy/vision/detection/contrib/yolov6.cc @@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } @@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess( output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, input_tensor_cuda_buffer_device_); output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; #else FDERROR << "CUDA src code was not enabled." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc old mode 100755 new mode 100644 index 91e22f32b..3374e16bb --- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc @@ -64,8 +64,9 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) { } } -bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output, - std::map>* im_info) { +bool YOLOv7Preprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; @@ -82,20 +83,22 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } -bool YOLOv7Preprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool YOLOv7Preprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); outputs->resize(1); // Concat all the preprocessed data to a batch tensor - std::vector tensors(images->size()); + std::vector tensors(images->size()); for (size_t i = 0; i < images->size(); ++i) { if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) { FDERROR << "Failed to preprocess input image." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc old mode 100755 new mode 100644 index daf4ee66b..af7ff0e5c --- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc @@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } @@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result, return false; } - if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) { + if (!Postprocess(reused_output_tensors_[0], result, im_info, + conf_threshold)) { FDERROR << "Failed to post process." << std::endl; return false; } diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc index 49961df65..e969771a2 100644 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc @@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } @@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess( output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32, input_tensor_cuda_buffer_device_); output->device = Device::GPU; - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; #else FDERROR << "CUDA src code was not enabled." << std::endl; diff --git a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc index 1c6d9f62c..ebb8b28cd 100644 --- a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc +++ b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc @@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess( static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/detection/contrib/yolox.cc b/fastdeploy/vision/detection/contrib/yolox.cc index c1c071826..e7d931c42 100755 --- a/fastdeploy/vision/detection/contrib/yolox.cc +++ b/fastdeploy/vision/detection/contrib/yolox.cc @@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc index f7b689575..0b914fb05 100644 --- a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc +++ b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc @@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facealign/contrib/pfld.cc b/fastdeploy/vision/facealign/contrib/pfld.cc index 5978f10b7..d57427090 100644 --- a/fastdeploy/vision/facealign/contrib/pfld.cc +++ b/fastdeploy/vision/facealign/contrib/pfld.cc @@ -22,13 +22,12 @@ namespace vision { namespace facealign { -PFLD::PFLD(const std::string& model_file, - const std::string& params_file, +PFLD::PFLD(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -71,12 +70,13 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } -bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result, - const std::map>& im_info) { +bool PFLD::Postprocess( + FDTensor& infer_result, FaceAlignmentResult* result, + const std::map>& im_info) { FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); if (infer_result.dtype != FDDataType::FP32) { FDERROR << "Only support post process with float32 data." << std::endl; @@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result, } auto iter_in = im_info.find("input_shape"); - FDASSERT(iter_in != im_info.end(), - "Cannot find input_shape from im_info."); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); int in_h = iter_in->second[0]; int in_w = iter_in->second[1]; @@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result, x = std::min(std::max(0.f, x), 1.0f); y = std::min(std::max(0.f, y), 1.0f); // decode landmarks (default 106 landmarks) - result->landmarks.emplace_back( - std::array{x * in_w, y * in_h}); + result->landmarks.emplace_back(std::array{x * in_w, y * in_h}); } return true; diff --git a/fastdeploy/vision/facealign/contrib/pipnet.cc b/fastdeploy/vision/facealign/contrib/pipnet.cc index 27ec35c0d..3af16fa91 100644 --- a/fastdeploy/vision/facealign/contrib/pipnet.cc +++ b/fastdeploy/vision/facealign/contrib/pipnet.cc @@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output, HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facedet/contrib/retinaface.cc b/fastdeploy/vision/facedet/contrib/retinaface.cc index 6f38f5636..cd5f93ab9 100644 --- a/fastdeploy/vision/facedet/contrib/retinaface.cc +++ b/fastdeploy/vision/facedet/contrib/retinaface.cc @@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -145,7 +145,7 @@ bool RetinaFace::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facedet/contrib/ultraface.cc b/fastdeploy/vision/facedet/contrib/ultraface.cc index e7dd99dc4..cf398b2e4 100644 --- a/fastdeploy/vision/facedet/contrib/ultraface.cc +++ b/fastdeploy/vision/facedet/contrib/ultraface.cc @@ -27,7 +27,7 @@ UltraFace::UltraFace(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; + valid_cpu_backends = {Backend::ORT}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; @@ -90,7 +90,7 @@ bool UltraFace::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.cc b/fastdeploy/vision/facedet/contrib/yolov5face.cc index d508e905a..3fb309bea 100644 --- a/fastdeploy/vision/facedet/contrib/yolov5face.cc +++ b/fastdeploy/vision/facedet/contrib/yolov5face.cc @@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -115,11 +115,11 @@ bool YOLOv5Face::Preprocess( // process after image load float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), size[0] * 1.0f / static_cast(mat->Width())); -#ifndef __ANDROID__ - // Because of the low CPU performance on the Android device, - // we decided to hide this extra resize. It won't make much +#ifndef __ANDROID__ + // Because of the low CPU performance on the Android device, + // we decided to hide this extra resize. It won't make much // difference to the final result. - if (std::fabs(ratio - 1.0f) > 1e-06) { + if (std::fabs(ratio - 1.0f) > 1e-06) { int interp = cv::INTER_LINEAR; if (ratio > 1.0) { interp = cv::INTER_LINEAR; @@ -128,7 +128,7 @@ bool YOLOv5Face::Preprocess( int resize_w = int(round(static_cast(mat->Width()) * ratio)); Resize::Run(mat, resize_w, resize_h, -1, -1, interp); } -#endif +#endif // yolov5face's preprocess steps // 1. letterbox // 2. BGR->RGB @@ -149,9 +149,9 @@ bool YOLOv5Face::Preprocess( HWC2CHW::Run(mat); Cast::Run(mat, "float"); - + mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc index ad5dd7e33..7af63f585 100644 --- a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc +++ b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc @@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() { max_wh_ = 7680.0; } -bool Yolov7FacePreprocessor::Run(std::vector* images, std::vector* outputs, - std::vector>>* ims_info) { +bool Yolov7FacePreprocessor::Run( + std::vector* images, std::vector* outputs, + std::vector>>* ims_info) { if (images->size() == 0) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } ims_info->resize(images->size()); @@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector* images, std::vector>* im_info){ +bool Yolov7FacePreprocessor::Preprocess( + FDMat* mat, FDTensor* output, + std::map>* im_info) { // Record the shape of image and the shape of preprocessed image (*im_info)["input_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; @@ -75,13 +78,13 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output, static_cast(mat->Width())}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } void Yolov7FacePreprocessor::LetterBox(FDMat* mat) { float scale = - std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); + std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); if (!is_scale_up_) { scale = std::min(scale, 1.0f); } diff --git a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc old mode 100755 new mode 100644 index 8e8f95950..cb0d90310 --- a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc +++ b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc @@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() { permute_ = true; } -bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) { - +bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) { // face recognition model's preprocess steps in insightface // reference: insightface/recognition/arcface_torch/inference.py // 1. Resize @@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) { Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } bool AdaFacePreprocessor::Run(std::vector* images, std::vector* outputs) { if (images->empty()) { - FDERROR << "The size of input images should be greater than 0." << std::endl; + FDERROR << "The size of input images should be greater than 0." + << std::endl; return false; } FDASSERT(images->size() == 1, "Only support batch = 1 now."); diff --git a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc index 398a7016e..e7f55cf65 100644 --- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc +++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc @@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat, } mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } diff --git a/fastdeploy/vision/headpose/contrib/fsanet.cc b/fastdeploy/vision/headpose/contrib/fsanet.cc index 59f25ac5a..c22909134 100644 --- a/fastdeploy/vision/headpose/contrib/fsanet.cc +++ b/fastdeploy/vision/headpose/contrib/fsanet.cc @@ -22,13 +22,12 @@ namespace vision { namespace headpose { -FSANet::FSANet(const std::string& model_file, - const std::string& params_file, +FSANet::FSANet(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; @@ -52,7 +51,7 @@ bool FSANet::Initialize() { } bool FSANet::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { + std::map>* im_info) { // Resize int resize_w = size[0]; int resize_h = size[1]; @@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output, // Normalize std::vector alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f}; - std::vector beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f}; + std::vector beta = {-127.5f / 128.0f, -127.5f / 128.0f, + -127.5f / 128.0f}; Convert::Run(mat, alpha, beta); // Record output shape of preprocessed image @@ -72,12 +72,13 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output, Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } -bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result, - const std::map>& im_info) { +bool FSANet::Postprocess( + FDTensor& infer_result, HeadPoseResult* result, + const std::map>& im_info) { FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now."); if (infer_result.dtype != FDDataType::FP32) { FDERROR << "Only support post process with float32 data." << std::endl; @@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result, } auto iter_in = im_info.find("input_shape"); - FDASSERT(iter_in != im_info.end(), - "Cannot find input_shape from im_info."); + FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info."); int in_h = iter_in->second[0]; int in_w = iter_in->second[1]; diff --git a/fastdeploy/vision/matting/contrib/modnet.cc b/fastdeploy/vision/matting/contrib/modnet.cc index c3a89733d..05141a926 100644 --- a/fastdeploy/vision/matting/contrib/modnet.cc +++ b/fastdeploy/vision/matting/contrib/modnet.cc @@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output, Cast::Run(mat, "float"); mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w return true; } @@ -106,8 +106,8 @@ bool MODNet::Postprocess( float* alpha_ptr = static_cast(alpha_tensor.Data()); // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); // Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. - Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, - alpha_ptr); // ref-only, zero copy. + Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + alpha_ptr); // ref-only, zero copy. if ((out_h != ipt_h) || (out_w != ipt_w)) { Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1); } diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc old mode 100755 new mode 100644 index 258205cf8..2b16aab83 --- a/fastdeploy/vision/matting/contrib/rvm.cc +++ b/fastdeploy/vision/matting/contrib/rvm.cc @@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess( (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; mat->ShareWithTensor(output); - output->ExpandDim(0); // reshape to n, h, w, c + output->ExpandDim(0); // reshape to n, c, h, w return true; } @@ -118,16 +118,16 @@ bool RobustVideoMatting::Postprocess( // for alpha float* alpha_ptr = static_cast(alpha.Data()); - Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, - alpha_ptr); // ref-only, zero copy. + Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + alpha_ptr); // ref-only, zero copy. if ((out_h != in_h) || (out_w != in_w)) { Resize::Run(&alpha_resized, in_w, in_h, -1, -1); } // for foreground float* fgr_ptr = static_cast(fgr.Data()); - Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, - fgr_ptr); // ref-only, zero copy. + Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, + fgr_ptr); // ref-only, zero copy. if ((out_h != in_h) || (out_w != in_w)) { Resize::Run(&fgr_resized, in_w, in_h, -1, -1); } diff --git a/fastdeploy/vision/utils/sort_det_res.cc b/fastdeploy/vision/utils/sort_det_res.cc old mode 100644 new mode 100755 index dd33478a3..d0813a260 --- a/fastdeploy/vision/utils/sort_det_res.cc +++ b/fastdeploy/vision/utils/sort_det_res.cc @@ -77,27 +77,42 @@ void SortDetectionResult(DetectionResult* result) { MergeSort(result, low, high); } -bool LexSortByXYCompare(const std::array& box_a, - const std::array& box_b) { +template +bool LexSortByXYCompare(const std::array& box_a, + const std::array& box_b) { // WARN: The status shoule be false if (a==b). // https://blog.csdn.net/xxxwrq/article/details/83080640 - auto is_equal = [](const float& a, const float& b) -> bool { + auto is_equal = [](const T& a, const T& b) -> bool { return std::abs(a - b) < 1e-6f; }; - const float& x0_a = box_a[0]; - const float& y0_a = box_a[1]; - const float& x0_b = box_b[0]; - const float& y0_b = box_b[1]; + const T& x0_a = box_a[0]; + const T& y0_a = box_a[1]; + const T& x0_b = box_b[0]; + const T& y0_b = box_b[1]; if (is_equal(x0_a, x0_b)) { return is_equal(y0_a, y0_b) ? false : y0_a > y0_b; } return x0_a > x0_b; } +// Only for int dtype +template <> +bool LexSortByXYCompare(const std::array& box_a, + const std::array& box_b) { + const int& x0_a = box_a[0]; + const int& y0_a = box_a[1]; + const int& x0_b = box_b[0]; + const int& y0_b = box_b[1]; + if (x0_a == x0_b) { + return y0_a == y0_b ? false : y0_a > y0_b; + } + return x0_a > x0_b; +} + void ReorderDetectionResultByIndices(DetectionResult* result, const std::vector& indices) { // reorder boxes, scores, label_ids, masks - DetectionResult backup = (*result); // move + DetectionResult backup = (*result); const bool contain_masks = backup.contain_masks; const int boxes_num = backup.boxes.size(); result->Clear(); @@ -122,7 +137,7 @@ void ReorderDetectionResultByIndices(DetectionResult* result, } void LexSortDetectionResultByXY(DetectionResult* result) { - if (result->boxes.size() == 0) { + if (result->boxes.empty()) { return; } std::vector indices; @@ -138,6 +153,35 @@ void LexSortDetectionResultByXY(DetectionResult* result) { ReorderDetectionResultByIndices(result, indices); } +void LexSortOCRDetResultByXY(std::vector>* result) { + if (result->empty()) { + return; + } + std::vector indices; + indices.resize(result->size()); + std::vector> boxes; + boxes.resize(result->size()); + for (size_t i = 0; i < result->size(); ++i) { + indices[i] = i; + // 4 points to 2 points for LexSort + boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6], + (*result)[i][7]}; + } + // lex sort by x(w) then y(h) + std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) { + return LexSortByXYCompare(boxes[a], boxes[b]); + }); + // reorder boxes + std::vector> backup = (*result); + const int boxes_num = backup.size(); + result->clear(); + result->resize(boxes_num); + // boxes + for (int i = 0; i < boxes_num; ++i) { + (*result)[i] = backup[indices[i]]; + } +} + } // namespace utils } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h old mode 100644 new mode 100755 index bca781973..1f8f21b48 --- a/fastdeploy/vision/utils/utils.h +++ b/fastdeploy/vision/utils/utils.h @@ -67,8 +67,11 @@ void NMS(FaceDetectionResult* result, float iou_threshold = 0.5); /// Sort DetectionResult/FaceDetectionResult by score FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result); FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result); -/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis +/// Lex Sort DetectionResult by x(w) & y(h) axis FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result); +/// Lex Sort OCRDet Result by x(w) & y(h) axis +FASTDEPLOY_DECL void LexSortOCRDetResultByXY( + std::vector>* result); /// L2 Norm / cosine similarity (for face recognition, ...) FASTDEPLOY_DECL std::vector