[Benchmark]Compare diff for OCR (#1415)

* avoid mem copy for cpp benchmark * set CMAKE_BUILD_TYPE to Release * Add SegmentationDiff * change pointer to reference * fixed bug * cast uint8 to int32 * Add diff compare for OCR * Add diff compare for OCR * rm ppocr pipeline * Add yolov5 diff compare * Add yolov5 diff compare * deal with comments * deal with comments * fixed bug * fixed bug
2025-10-05 16:48:03 +08:00 · 2023-02-23 18:57:39 +08:00
parent 0c664fd006
commit d3845eb4e1
38 changed files with 513 additions and 255 deletions
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
 add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
 add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
 add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
-add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc)
+add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
 add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
 add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
@@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
-  target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
 else()
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
-  target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
 endif()
--- a/benchmark/cpp/benchmark_ppocr.cc
+++ b/benchmark/cpp/benchmark_ppocr.cc
@@ -1,97 +0,0 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "flags.h"
 #include "macros.h"
 #include "option.h"
 // Only for ppocr
 DEFINE_string(det_model, "", "Path of Detection model of PPOCR.");
 DEFINE_string(cls_model, "", "Path of Classification model of PPOCR.");
 DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR.");
 DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
 DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR.");
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option, argc, argv, true)) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
  auto im_rec = cv::imread(FLAGS_image_rec);
  // Detection Model
  auto det_model_file =
      FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel";
  auto det_params_file =
      FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams";
  // Classification Model
  auto cls_model_file =
      FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel";
  auto cls_params_file =
      FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams";
  // Recognition Model
  auto rec_model_file =
      FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel";
  auto rec_params_file =
      FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams";
  auto rec_label_file = FLAGS_rec_label_file;
  if (FLAGS_backend == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
  auto det_option = option;
  auto cls_option = option;
  auto rec_option = option;
  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
    det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
                                   {1, 3, 960, 960});
    cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
                                   {8, 3, 48, 1024});
    rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
                                   {8, 3, 48, 2304});
  }
  auto det_model = fastdeploy::vision::ocr::DBDetector(
      det_model_file, det_params_file, det_option);
  auto cls_model = fastdeploy::vision::ocr::Classifier(
      cls_model_file, cls_params_file, cls_option);
  auto rec_model = fastdeploy::vision::ocr::Recognizer(
      rec_model_file, rec_params_file, rec_label_file, rec_option);
  // Only for runtime
  if (FLAGS_profile_mode == "runtime") {
    std::vector<std::array<int, 8>> boxes_result;
    std::cout << "====Detection model====" << std::endl;
    BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result));
    int32_t cls_label;
    float cls_score;
    std::cout << "====Classification model====" << std::endl;
    BENCHMARK_MODEL(cls_model,
                    cls_model.Predict(im_rec, &cls_label, &cls_score));
    std::string text;
    float rec_score;
    std::cout << "====Recognization model====" << std::endl;
    BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score));
  }
  auto model_ppocrv3 =
      fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
  fastdeploy::vision::OCRResult res;
  if (FLAGS_profile_mode == "end2end") {
    BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res))
  }
  auto vis_im = fastdeploy::vision::VisOcr(im, res);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 #endif
  return 0;
 }
--- a/benchmark/cpp/benchmark_ppocr_cls.cc
+++ b/benchmark/cpp/benchmark_ppocr_cls.cc
@@ -0,0 +1,57 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "flags.h"
 #include "macros.h"
 #include "option.h"
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option, argc, argv, true)) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
  // Classification Model
  auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
  if (FLAGS_backend == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
                               {8, 3, 48, 1024});
  }
  auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
      cls_model_file, cls_params_file, option);
  int32_t res_label;
  float res_score;
  // Run once at least
  model_ppocr_cls.Predict(im, &res_label, &res_score);
  // 1. Test result diff
  std::cout << "=============== Test result diff =================\n";
  int32_t res_label_expect = 0;
  float res_score_expect = 1.0;
  // Calculate diff between two results.
  auto ppocr_cls_label_diff = res_label - res_label_expect;
  auto ppocr_cls_score_diff = res_score - res_score_expect;
  std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
  std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
            << std::endl;
  BENCHMARK_MODEL(model_ppocr_cls,
                  model_ppocr_cls.Predict(im, &res_label, &res_score));
 #endif
  return 0;
 }
--- a/benchmark/cpp/benchmark_ppocr_det.cc
+++ b/benchmark/cpp/benchmark_ppocr_det.cc
@@ -0,0 +1,63 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "flags.h"
 #include "macros.h"
 #include "option.h"
 namespace vision = fastdeploy::vision;
 namespace benchmark = fastdeploy::benchmark;
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option, argc, argv, true)) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
  // Detection Model
  auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
  if (FLAGS_backend == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
    option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
                               {1, 3, 960, 960});
  }
  auto model_ppocr_det =
      vision::ocr::DBDetector(det_model_file, det_params_file, option);
  std::vector<std::array<int, 8>> res;
  // Run once at least
  model_ppocr_det.Predict(im, &res);
  // 1. Test result diff
  std::cout << "=============== Test result diff =================\n";
  // Save result to -> disk.
  std::string ppocr_det_result_path = "ppocr_det_result.txt";
  benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path);
  // Load result from <- disk.
  std::vector<std::array<int, 8>> res_loaded;
  benchmark::ResultManager::LoadOCRDetResult(&res_loaded,
                                             ppocr_det_result_path);
  // Calculate diff between two results.
  auto ppocr_det_diff =
      benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
  std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
            << ", max=" << ppocr_det_diff.boxes.max
            << ", min=" << ppocr_det_diff.boxes.min << std::endl;
  BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
 #endif
  return 0;
 }
--- a/benchmark/cpp/benchmark_ppocr_rec.cc
+++ b/benchmark/cpp/benchmark_ppocr_rec.cc
@@ -0,0 +1,59 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "flags.h"
 #include "macros.h"
 #include "option.h"
 DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option, argc, argv, true)) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
  // Recognition Model
  auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
  if (FLAGS_backend == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
                               {8, 3, 48, 2304});
  }
  auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
      rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
  std::string text;
  float rec_score;
  // Run once at least
  model_ppocr_rec.Predict(im, &text, &rec_score);
  // 1. Test result diff
  std::cout << "=============== Test result diff =================\n";
  std::string text_expect = "上海斯格威铂尔大酒店";
  float res_score_expect = 0.993308;
  // Calculate diff between two results.
  auto ppocr_rec_text_diff = text.compare(text_expect);
  auto ppocr_rec_score_diff = rec_score - res_score_expect;
  std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
  std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
            << std::endl;
  BENCHMARK_MODEL(model_ppocr_rec,
                  model_ppocr_rec.Predict(im, &text, &rec_score));
 #endif
  return 0;
 }
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -16,6 +16,9 @@
 #include "macros.h"
 #include "option.h"
 namespace vision = fastdeploy::vision;
 namespace benchmark = fastdeploy::benchmark;
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
@@ -24,11 +27,29 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
-  auto model_yolov5 =
+  auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
-      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
+  vision::DetectionResult res;
-  fastdeploy::vision::DetectionResult res;
+  // Run once at least
  model_yolov5.Predict(im, &res);
  // 1. Test result diff
  std::cout << "=============== Test result diff =================\n";
  // Save result to -> disk.
  std::string det_result_path = "yolov5_result.txt";
  benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
  // Load result from <- disk.
  vision::DetectionResult res_loaded;
  benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
  // Calculate diff between two results.
  auto det_diff =
      benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
  std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
            << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
            << std::endl;
  std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
            << ", max=" << det_diff.labels.max
            << ", min=" << det_diff.labels.min << std::endl;
  BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
-  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  auto vis_im = vision::VisDetection(im, res);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 #endif
--- a/benchmark/cpp/run_benchmark_ppyolov8.sh
+++ b/benchmark/cpp/run_benchmark_ppyolov8.sh
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -474,6 +474,34 @@ bool ResultManager::SaveSegmentationResult(
  return true;
 }
 bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
                                     const std::string& path) {
  if (res.empty()) {
    FDERROR << "OCRDetResult can not be empty!" << std::endl;
    return false;
  }
  std::ofstream fs(path, std::ios::out);
  if (!fs.is_open()) {
    FDERROR << "Fail to open file:" << path << std::endl;
    return false;
  }
  fs.precision(20);
  // boxes
  fs << "boxes" << KEY_VALUE_SEP;
  for (int i = 0; i < res.size(); ++i) {
    for (int j = 0; j < 8; ++j) {
      if ((i == res.size() - 1) && (j == 7)) {
        fs << res[i][j];
      } else {
        fs << res[i][j] << VALUE_SEP;
      }
    }
  }
  fs << "\n";
  fs.close();
  return true;
 }
 bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
                                        const std::string& path) {
  if (!CheckFileExists(path)) {
@@ -556,6 +584,26 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
  return true;
 }
 bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
                                     const std::string& path) {
  if (!CheckFileExists(path)) {
    FDERROR << "Can't found file from" << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
  std::map<std::string, std::vector<std::string>> data;
  // boxes
  data = SplitDataLine(lines[0]);
  int boxes_num = data.begin()->second.size() / 8;
  res->resize(boxes_num);
  for (int i = 0; i < boxes_num; ++i) {
    for (int j = 0; j < 8; ++j) {
      (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
    }
  }
  return true;
 }
 DetectionDiff ResultManager::CalculateDiffStatis(
    const vision::DetectionResult& lhs, const vision::DetectionResult& rhs,
    const float& score_threshold) {
@@ -643,6 +691,31 @@ SegmentationDiff ResultManager::CalculateDiffStatis(
  return diff;
 }
 OCRDetDiff ResultManager::CalculateDiffStatis(
    const std::vector<std::array<int, 8>>& lhs,
    const std::vector<std::array<int, 8>>& rhs) {
  const int boxes_nums = std::min(lhs.size(), rhs.size());
  std::vector<std::array<int, 8>> lhs_sort = lhs;
  std::vector<std::array<int, 8>> rhs_sort = rhs;
  // lex sort by x(w) & y(h)
  vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
  vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
  // get value diff
  const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
  std::vector<float> boxes_diff;
  for (int i = 0; i < boxes_num; ++i) {
    for (int j = 0; j < 8; ++j) {
      boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
    }
  }
  OCRDetDiff diff;
  CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
                             &(diff.boxes.mean), &(diff.boxes.max),
                             &(diff.boxes.min));
  return diff;
 }
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -122,6 +122,10 @@ struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff {
  EvalStatis labels;
 };
 struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff {
  EvalStatis boxes;
 };
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
@@ -148,6 +152,10 @@ struct FASTDEPLOY_DECL ResultManager {
                                     const std::string& path);
  static bool LoadSegmentationResult(vision::SegmentationResult* res,
                                     const std::string& path);
  static bool SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
                               const std::string& path);
  static bool LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
                               const std::string& path);
  /// Calculate diff value between two basic results.
  static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs,
                                           const vision::DetectionResult& rhs,
@@ -157,6 +165,9 @@ struct FASTDEPLOY_DECL ResultManager {
  static SegmentationDiff CalculateDiffStatis(
      const vision::SegmentationResult& lhs,
      const vision::SegmentationResult& rhs);
  static OCRDetDiff CalculateDiffStatis(
      const std::vector<std::array<int, 8>>& lhs,
      const std::vector<std::array<int, 8>>& rhs);
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
 };
--- a/fastdeploy/vision/classification/contrib/resnet.cc
+++ b/fastdeploy/vision/classification/contrib/resnet.cc
@@ -13,23 +13,22 @@
 // limitations under the License.
 #include "fastdeploy/vision/classification/contrib/resnet.h"
 #include "fastdeploy/vision/utils/utils.h"
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/utils/utils.h"
 namespace fastdeploy {
 namespace vision {
 namespace classification {
-ResNet::ResNet(const std::string& model_file,
+ResNet::ResNet(const std::string& model_file, const std::string& params_file,
               const std::string& params_file,
               const RuntimeOption& custom_option,
               const ModelFormat& model_format) {
  // In constructor, the 3 steps below are necessary.
  // 1. set the Backend 2. set RuntimeOption 3. call Initialize()
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; 
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER};
    valid_gpu_backends = {Backend::PDINFER};
@@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file,
 }
 bool ResNet::Initialize() {
  // In this function, the 3 steps below are necessary.
  // 1. assign values to the global variables 2. call InitRuntime()
@@ -57,14 +55,15 @@ bool ResNet::Initialize() {
  return true;
 }
 bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
  // In this function, the preprocess need be implemented according to the
  // original Repos,
  // The result of preprocess has to be saved in FDTensor variable, because the
  // input of Infer() need to be std::vector<FDTensor>.
  // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
  // FDTensor variable.
-// In this function, the preprocess need be implemented according to the original Repos,
+  if (mat->Height() != size[0] || mat->Width() != size[1]) {
 // The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector<FDTensor>.
 // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable.
  if (mat->Height()!=size[0] || mat->Width()!=size[1]){
    int interp = cv::INTER_LINEAR;
    Resize::Run(mat, size[1], size[0], -1, -1, interp);
  }
@@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
-bool ResNet::Postprocess(FDTensor& infer_result,
+bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result,
-                                  ClassifyResult* result, int topk) {
+                         int topk) {
-
+  // In this function, the postprocess need be implemented according to the
-  // In this function, the postprocess need be implemented according to the original Repos,
+  // original Repos,
-  // Finally the reslut of postprocess should be saved in ClassifyResult variable.
+  // Finally the reslut of postprocess should be saved in ClassifyResult
-  // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable.
+  // variable.
  // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
  // variable.
  int num_classes = infer_result.shape[1];
  function::Softmax(infer_result, &infer_result);
-  const float* infer_result_buffer = reinterpret_cast<float*>(infer_result.Data());
+  const float* infer_result_buffer =
      reinterpret_cast<float*>(infer_result.Data());
  topk = std::min(num_classes, topk);
  result->label_ids =
      utils::TopKIndices(infer_result_buffer, num_classes, topk);
@@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result,
 }
 bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
-
+  // In this function, the Preprocess(), Infer(), and Postprocess() are called
-  // In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially.
+  // sequentially.
  Mat mat(*im);
  std::vector<FDTensor> processed_data(1);
@@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
  return true;
 }
 }  // namespace classification
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
+++ b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
@@ -20,18 +20,19 @@ namespace vision {
 namespace classification {
 YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
-  size_ = {224, 224}; //{h,w}
+  size_ = {224, 224};  //{h,w}
 }
-bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool YOLOv5ClsPreprocessor::Preprocess(
-            std::map<std::string, std::array<float, 2>>* im_info) {
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
  // process after image load
  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
+                                             static_cast<float>(mat->Width()));
  // yolov5cls's preprocess steps
  // 1. CenterCrop
@@ -54,20 +55,22 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
-bool YOLOv5ClsPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool YOLOv5ClsPreprocessor::Run(
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
  for (size_t i = 0; i < images->size(); ++i) {
    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
      FDERROR << "Failed to preprocess input image." << std::endl;
--- a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
@@ -20,26 +20,27 @@ namespace vision {
 namespace detection {
 FastestDetPreprocessor::FastestDetPreprocessor() {
-  size_ = {352, 352}; //{h,w}
+  size_ = {352, 352};  //{h,w}
 }
-bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool FastestDetPreprocessor::Preprocess(
-            std::map<std::string, std::array<float, 2>>* im_info) {
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
  // process after image load
  double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
+                                             static_cast<float>(mat->Width()));
  // fastestdet's preprocess steps
  // 1. resize
  // 2. convert_and_permute(swap_rb=false)
-  Resize::Run(mat, size_[0], size_[1]); //resize
+  Resize::Run(mat, size_[0], size_[1]);  // resize
  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  //convert to float and HWC2CHW
+  // convert to float and HWC2CHW
  ConvertAndPermute::Run(mat, alpha, beta, false);
  // Record output shape of preprocessed image
@@ -47,20 +48,22 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
-bool FastestDetPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool FastestDetPreprocessor::Run(
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
  for (size_t i = 0; i < images->size(); ++i) {
    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
      FDERROR << "Failed to preprocess input image." << std::endl;
--- a/fastdeploy/vision/detection/contrib/nanodet_plus.cc
+++ b/fastdeploy/vision/detection/contrib/nanodet_plus.cc
@@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file,
                         const RuntimeOption& custom_option,
                         const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
+    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc
+++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
@@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
                           const RuntimeOption& custom_option,
                           const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
+    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER};
    valid_gpu_backends = {Backend::PDINFER};
@@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/detection/contrib/yolor.cc
+++ b/fastdeploy/vision/detection/contrib/yolor.cc
@@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file,
             const RuntimeOption& custom_option,
             const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
+    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER};
    valid_gpu_backends = {Backend::PDINFER};
@@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
  }
 }
-bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool YOLOv5Preprocessor::Preprocess(
-            std::map<std::string, std::array<float, 2>>* im_info) {
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
-bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool YOLOv5Preprocessor::Run(
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
  for (size_t i = 0; i < images->size(); ++i) {
    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
      FDERROR << "Failed to preprocess input image." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov5lite.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc
@@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
@@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess(
  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                          input_tensor_cuda_buffer_device_);
  output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 #else
  FDERROR << "CUDA src code was not enabled." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) {
  }
 }
-bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool YOLOv5SegPreprocessor::Preprocess(
-            std::map<std::string, std::array<float, 2>>* im_info) {
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
-bool YOLOv5SegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool YOLOv5SegPreprocessor::Run(
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
  for (size_t i = 0; i < images->size(); ++i) {
    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
      FDERROR << "Failed to preprocess input image." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
@@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess(
  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                          input_tensor_cuda_buffer_device_);
  output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 #else
  FDERROR << "CUDA src code was not enabled." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
  }
 }
-bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool YOLOv7Preprocessor::Preprocess(
-            std::map<std::string, std::array<float, 2>>* im_info) {
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
-bool YOLOv7Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool YOLOv7Preprocessor::Run(
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
  for (size_t i = 0; i < images->size(); ++i) {
    if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
      FDERROR << "Failed to preprocess input image." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
@@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
@@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
    return false;
  }
-  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) {
+  if (!Postprocess(reused_output_tensors_[0], result, im_info,
                   conf_threshold)) {
    FDERROR << "Failed to post process." << std::endl;
    return false;
  }
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
@@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess(
  output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                          input_tensor_cuda_buffer_device_);
  output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 #else
  FDERROR << "CUDA src code was not enabled." << std::endl;
--- a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
@@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess(
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/detection/contrib/yolox.cc
+++ b/fastdeploy/vision/detection/contrib/yolox.cc
@@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
+++ b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
@@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facealign/contrib/pfld.cc
+++ b/fastdeploy/vision/facealign/contrib/pfld.cc
@@ -22,13 +22,12 @@ namespace vision {
 namespace facealign {
-PFLD::PFLD(const std::string& model_file,
+PFLD::PFLD(const std::string& model_file, const std::string& params_file,
           const std::string& params_file,
           const RuntimeOption& custom_option,
           const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; 
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -71,12 +70,13 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
-bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
+bool PFLD::Postprocess(
-                       const std::map<std::string, std::array<int, 2>>& im_info) {
+    FDTensor& infer_result, FaceAlignmentResult* result,
    const std::map<std::string, std::array<int, 2>>& im_info) {
  FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
  if (infer_result.dtype != FDDataType::FP32) {
    FDERROR << "Only support post process with float32 data." << std::endl;
@@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
  }
  auto iter_in = im_info.find("input_shape");
-  FDASSERT(iter_in != im_info.end(),
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
           "Cannot find input_shape from im_info.");
  int in_h = iter_in->second[0];
  int in_w = iter_in->second[1];
@@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
    x = std::min(std::max(0.f, x), 1.0f);
    y = std::min(std::max(0.f, y), 1.0f);
    // decode landmarks (default 106 landmarks)
-    result->landmarks.emplace_back(
+    result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
        std::array<float, 2>{x * in_w, y * in_h});
  }
  return true;
--- a/fastdeploy/vision/facealign/contrib/pipnet.cc
+++ b/fastdeploy/vision/facealign/contrib/pipnet.cc
@@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facedet/contrib/retinaface.cc
+++ b/fastdeploy/vision/facedet/contrib/retinaface.cc
@@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file,
                       const RuntimeOption& custom_option,
                       const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
+    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -145,7 +145,7 @@ bool RetinaFace::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facedet/contrib/ultraface.cc
+++ b/fastdeploy/vision/facedet/contrib/ultraface.cc
@@ -27,7 +27,7 @@ UltraFace::UltraFace(const std::string& model_file,
                     const RuntimeOption& custom_option,
                     const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
+    valid_cpu_backends = {Backend::ORT};
    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
@@ -90,7 +90,7 @@ bool UltraFace::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facedet/contrib/yolov5face.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov5face.cc
@@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file,
                       const RuntimeOption& custom_option,
                       const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
+    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -115,11 +115,11 @@ bool YOLOv5Face::Preprocess(
  // process after image load
  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-#ifndef __ANDROID__     
+#ifndef __ANDROID__
-  // Because of the low CPU performance on the Android device, 
+  // Because of the low CPU performance on the Android device,
-  // we decided to hide this extra resize. It won't make much 
+  // we decided to hide this extra resize. It won't make much
  // difference to the final result.
-  if (std::fabs(ratio - 1.0f) > 1e-06) {  
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
    int interp = cv::INTER_LINEAR;
    if (ratio > 1.0) {
      interp = cv::INTER_LINEAR;
@@ -128,7 +128,7 @@ bool YOLOv5Face::Preprocess(
    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
  }
-#endif  
+#endif
  // yolov5face's preprocess steps
  // 1. letterbox
  // 2. BGR->RGB
@@ -149,9 +149,9 @@ bool YOLOv5Face::Preprocess(
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
-  
+
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
@@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
  max_wh_ = 7680.0;
 }
-bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool Yolov7FacePreprocessor::Run(
-                                 std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
  if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  ims_info->resize(images->size());
@@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
  return true;
 }
-bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
+bool Yolov7FacePreprocessor::Preprocess(
-                                        std::map<std::string, std::array<float, 2>>* im_info){
+    FDMat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // Record the shape of image and the shape of preprocessed image
  (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                               static_cast<float>(mat->Width())};
@@ -75,13 +78,13 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                static_cast<float>(mat->Width())};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
 void Yolov7FacePreprocessor::LetterBox(FDMat* mat) {
  float scale =
-      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); 
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
  if (!is_scale_up_) {
    scale = std::min(scale, 1.0f);
  }
--- a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
@@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() {
  permute_ = true;
 }
-bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
+bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) {
  // face recognition model's preprocess steps in insightface
  // reference: insightface/recognition/arcface_torch/inference.py
  // 1. Resize
@@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
 bool AdaFacePreprocessor::Run(std::vector<FDMat>* images,
                              std::vector<FDTensor>* outputs) {
  if (images->empty()) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
            << std::endl;
    return false;
  }
  FDASSERT(images->size() == 1, "Only support batch = 1 now.");
--- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
@@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat,
  }
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
--- a/fastdeploy/vision/headpose/contrib/fsanet.cc
+++ b/fastdeploy/vision/headpose/contrib/fsanet.cc
@@ -22,13 +22,12 @@ namespace vision {
 namespace headpose {
-FSANet::FSANet(const std::string& model_file,
+FSANet::FSANet(const std::string& model_file, const std::string& params_file,
               const std::string& params_file,
               const RuntimeOption& custom_option,
               const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; 
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -52,7 +51,7 @@ bool FSANet::Initialize() {
 }
 bool FSANet::Preprocess(Mat* mat, FDTensor* output,
-                      std::map<std::string, std::array<int, 2>>* im_info) {
+                        std::map<std::string, std::array<int, 2>>* im_info) {
  // Resize
  int resize_w = size[0];
  int resize_h = size[1];
@@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
  // Normalize
  std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
-  std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f};
+  std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f,
                             -127.5f / 128.0f};
  Convert::Run(mat, alpha, beta);
  // Record output shape of preprocessed image
@@ -72,12 +72,13 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
-bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
+bool FSANet::Postprocess(
-                       const std::map<std::string, std::array<int, 2>>& im_info) {
+    FDTensor& infer_result, HeadPoseResult* result,
    const std::map<std::string, std::array<int, 2>>& im_info) {
  FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
  if (infer_result.dtype != FDDataType::FP32) {
    FDERROR << "Only support post process with float32 data." << std::endl;
@@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
  }
  auto iter_in = im_info.find("input_shape");
-  FDASSERT(iter_in != im_info.end(),
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
           "Cannot find input_shape from im_info.");
  int in_h = iter_in->second[0];
  int in_w = iter_in->second[1];
--- a/fastdeploy/vision/matting/contrib/modnet.cc
+++ b/fastdeploy/vision/matting/contrib/modnet.cc
@@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output,
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
  return true;
 }
@@ -106,8 +106,8 @@ bool MODNet::Postprocess(
  float* alpha_ptr = static_cast<float*>(alpha_tensor.Data());
  // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
  // Mat alpha_resized(alpha_zero_copy_ref);  // ref-only, zero copy.
-  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
-                                  alpha_ptr); // ref-only, zero copy.
+                                  alpha_ptr);  // ref-only, zero copy.
  if ((out_h != ipt_h) || (out_w != ipt_w)) {
    Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
  }
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess(
  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
  mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
  return true;
 }
@@ -118,16 +118,16 @@ bool RobustVideoMatting::Postprocess(
  // for alpha
  float* alpha_ptr = static_cast<float*>(alpha.Data());
-  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
-                                  alpha_ptr); // ref-only, zero copy.
+                                  alpha_ptr);  // ref-only, zero copy.
  if ((out_h != in_h) || (out_w != in_w)) {
    Resize::Run(&alpha_resized, in_w, in_h, -1, -1);
  }
  // for foreground
  float* fgr_ptr = static_cast<float*>(fgr.Data());
-  Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
+  Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
-                                fgr_ptr); // ref-only, zero copy.
+                                fgr_ptr);  // ref-only, zero copy.
  if ((out_h != in_h) || (out_w != in_w)) {
    Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
  }
--- a/fastdeploy/vision/utils/sort_det_res.cc
+++ b/fastdeploy/vision/utils/sort_det_res.cc
@@ -77,27 +77,42 @@ void SortDetectionResult(DetectionResult* result) {
  MergeSort(result, low, high);
 }
-bool LexSortByXYCompare(const std::array<float, 4>& box_a,
+template <typename T>
-                        const std::array<float, 4>& box_b) {
+bool LexSortByXYCompare(const std::array<T, 4>& box_a,
                        const std::array<T, 4>& box_b) {
  // WARN: The status shoule be false if (a==b).
  // https://blog.csdn.net/xxxwrq/article/details/83080640
-  auto is_equal = [](const float& a, const float& b) -> bool {
+  auto is_equal = [](const T& a, const T& b) -> bool {
    return std::abs(a - b) < 1e-6f;
  };
-  const float& x0_a = box_a[0];
+  const T& x0_a = box_a[0];
-  const float& y0_a = box_a[1];
+  const T& y0_a = box_a[1];
-  const float& x0_b = box_b[0];
+  const T& x0_b = box_b[0];
-  const float& y0_b = box_b[1];
+  const T& y0_b = box_b[1];
  if (is_equal(x0_a, x0_b)) {
    return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
  }
  return x0_a > x0_b;
 }
 // Only for int dtype
 template <>
 bool LexSortByXYCompare(const std::array<int, 4>& box_a,
                        const std::array<int, 4>& box_b) {
  const int& x0_a = box_a[0];
  const int& y0_a = box_a[1];
  const int& x0_b = box_b[0];
  const int& y0_b = box_b[1];
  if (x0_a == x0_b) {
    return y0_a == y0_b ? false : y0_a > y0_b;
  }
  return x0_a > x0_b;
 }
 void ReorderDetectionResultByIndices(DetectionResult* result,
                                     const std::vector<size_t>& indices) {
  // reorder boxes, scores, label_ids, masks
-  DetectionResult backup = (*result);  // move
+  DetectionResult backup = (*result);
  const bool contain_masks = backup.contain_masks;
  const int boxes_num = backup.boxes.size();
  result->Clear();
@@ -122,7 +137,7 @@ void ReorderDetectionResultByIndices(DetectionResult* result,
 }
 void LexSortDetectionResultByXY(DetectionResult* result) {
-  if (result->boxes.size() == 0) {
+  if (result->boxes.empty()) {
    return;
  }
  std::vector<size_t> indices;
@@ -138,6 +153,35 @@ void LexSortDetectionResultByXY(DetectionResult* result) {
  ReorderDetectionResultByIndices(result, indices);
 }
 void LexSortOCRDetResultByXY(std::vector<std::array<int, 8>>* result) {
  if (result->empty()) {
    return;
  }
  std::vector<size_t> indices;
  indices.resize(result->size());
  std::vector<std::array<int, 4>> boxes;
  boxes.resize(result->size());
  for (size_t i = 0; i < result->size(); ++i) {
    indices[i] = i;
    // 4 points to 2 points for LexSort
    boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6],
                (*result)[i][7]};
  }
  // lex sort by x(w) then y(h)
  std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
    return LexSortByXYCompare(boxes[a], boxes[b]);
  });
  // reorder boxes
  std::vector<std::array<int, 8>> backup = (*result);
  const int boxes_num = backup.size();
  result->clear();
  result->resize(boxes_num);
  // boxes
  for (int i = 0; i < boxes_num; ++i) {
    (*result)[i] = backup[indices[i]];
  }
 }
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/utils/utils.h
+++ b/fastdeploy/vision/utils/utils.h
@@ -67,8 +67,11 @@ void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
 /// Sort DetectionResult/FaceDetectionResult by score
 FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result);
 FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result);
-/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis
+/// Lex Sort DetectionResult by x(w) & y(h) axis
 FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result);
 /// Lex Sort OCRDet Result by x(w) & y(h) axis
 FASTDEPLOY_DECL void LexSortOCRDetResultByXY(
                     std::vector<std::array<int, 8>>* result);
 /// L2 Norm / cosine similarity  (for face recognition, ...)
 FASTDEPLOY_DECL std::vector<float>