diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
index f839eb228..a6f0b87c2 100755
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
 add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
 add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
 add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
-add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc)
+add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
+add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
+add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
@@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
-  target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
-  target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
 endif()
diff --git a/benchmark/cpp/benchmark_ppocr.cc b/benchmark/cpp/benchmark_ppocr.cc
deleted file mode 100755
index e81080c54..000000000
--- a/benchmark/cpp/benchmark_ppocr.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "flags.h"
-#include "macros.h"
-#include "option.h"
-
-// Only for ppocr
-DEFINE_string(det_model, "", "Path of Detection model of PPOCR.");
-DEFINE_string(cls_model, "", "Path of Classification model of PPOCR.");
-DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR.");
-DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
-DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR.");
-
-int main(int argc, char* argv[]) {
-#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
-  // Initialization
-  auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option, argc, argv, true)) {
-    return -1;
-  }
-  auto im = cv::imread(FLAGS_image);
-  auto im_rec = cv::imread(FLAGS_image_rec);
-  // Detection Model
-  auto det_model_file =
-      FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel";
-  auto det_params_file =
-      FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams";
-  // Classification Model
-  auto cls_model_file =
-      FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel";
-  auto cls_params_file =
-      FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams";
-  // Recognition Model
-  auto rec_model_file =
-      FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel";
-  auto rec_params_file =
-      FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams";
-  auto rec_label_file = FLAGS_rec_label_file;
-  if (FLAGS_backend == "paddle_trt") {
-    option.paddle_infer_option.collect_trt_shape = true;
-  }
-  auto det_option = option;
-  auto cls_option = option;
-  auto rec_option = option;
-  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
-    det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
-                                   {1, 3, 960, 960});
-    cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
-                                   {8, 3, 48, 1024});
-    rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
-                                   {8, 3, 48, 2304});
-  }
-  auto det_model = fastdeploy::vision::ocr::DBDetector(
-      det_model_file, det_params_file, det_option);
-  auto cls_model = fastdeploy::vision::ocr::Classifier(
-      cls_model_file, cls_params_file, cls_option);
-  auto rec_model = fastdeploy::vision::ocr::Recognizer(
-      rec_model_file, rec_params_file, rec_label_file, rec_option);
-  // Only for runtime
-  if (FLAGS_profile_mode == "runtime") {
-    std::vector<std::array<int, 8>> boxes_result;
-    std::cout << "====Detection model====" << std::endl;
-    BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result));
-    int32_t cls_label;
-    float cls_score;
-    std::cout << "====Classification model====" << std::endl;
-    BENCHMARK_MODEL(cls_model,
-                    cls_model.Predict(im_rec, &cls_label, &cls_score));
-    std::string text;
-    float rec_score;
-    std::cout << "====Recognization model====" << std::endl;
-    BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score));
-  }
-  auto model_ppocrv3 =
-      fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
-  fastdeploy::vision::OCRResult res;
-  if (FLAGS_profile_mode == "end2end") {
-    BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res))
-  }
-  auto vis_im = fastdeploy::vision::VisOcr(im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-#endif
-  return 0;
-}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_cls.cc b/benchmark/cpp/benchmark_ppocr_cls.cc
new file mode 100644
index 000000000..0ddd939bc
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_cls.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  // Classification Model
+  auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
+  auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
+  if (FLAGS_backend == "paddle_trt") {
+    option.paddle_infer_option.collect_trt_shape = true;
+  }
+  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
+                               {8, 3, 48, 1024});
+  }
+  auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
+      cls_model_file, cls_params_file, option);
+  int32_t res_label;
+  float res_score;
+  // Run once at least
+  model_ppocr_cls.Predict(im, &res_label, &res_score);
+  // 1. Test result diff
+  std::cout << "=============== Test result diff =================\n";
+  int32_t res_label_expect = 0;
+  float res_score_expect = 1.0;
+  // Calculate diff between two results.
+  auto ppocr_cls_label_diff = res_label - res_label_expect;
+  auto ppocr_cls_score_diff = res_score - res_score_expect;
+  std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
+  std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
+            << std::endl;
+  BENCHMARK_MODEL(model_ppocr_cls,
+                  model_ppocr_cls.Predict(im, &res_label, &res_score));
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_det.cc b/benchmark/cpp/benchmark_ppocr_det.cc
new file mode 100644
index 000000000..f98b1c9f3
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_det.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  // Detection Model
+  auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
+  auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
+  if (FLAGS_backend == "paddle_trt") {
+    option.paddle_infer_option.collect_trt_shape = true;
+  }
+  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+    option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
+                               {1, 3, 960, 960});
+  }
+  auto model_ppocr_det =
+      vision::ocr::DBDetector(det_model_file, det_params_file, option);
+  std::vector<std::array<int, 8>> res;
+  // Run once at least
+  model_ppocr_det.Predict(im, &res);
+  // 1. Test result diff
+  std::cout << "=============== Test result diff =================\n";
+  // Save result to -> disk.
+  std::string ppocr_det_result_path = "ppocr_det_result.txt";
+  benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path);
+  // Load result from <- disk.
+  std::vector<std::array<int, 8>> res_loaded;
+  benchmark::ResultManager::LoadOCRDetResult(&res_loaded,
+                                             ppocr_det_result_path);
+  // Calculate diff between two results.
+  auto ppocr_det_diff =
+      benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+  std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
+            << ", max=" << ppocr_det_diff.boxes.max
+            << ", min=" << ppocr_det_diff.boxes.min << std::endl;
+  BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_rec.cc b/benchmark/cpp/benchmark_ppocr_rec.cc
new file mode 100644
index 000000000..71bb6b353
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_rec.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  // Recognition Model
+  auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
+  auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
+  if (FLAGS_backend == "paddle_trt") {
+    option.paddle_infer_option.collect_trt_shape = true;
+  }
+  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
+                               {8, 3, 48, 2304});
+  }
+  auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
+      rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
+  std::string text;
+  float rec_score;
+  // Run once at least
+  model_ppocr_rec.Predict(im, &text, &rec_score);
+  // 1. Test result diff
+  std::cout << "=============== Test result diff =================\n";
+  std::string text_expect = "上海斯格威铂尔大酒店";
+  float res_score_expect = 0.993308;
+  // Calculate diff between two results.
+  auto ppocr_rec_text_diff = text.compare(text_expect);
+  auto ppocr_rec_score_diff = rec_score - res_score_expect;
+  std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
+  std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
+            << std::endl;
+  BENCHMARK_MODEL(model_ppocr_rec,
+                  model_ppocr_rec.Predict(im, &text, &rec_score));
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
index 07c36e31e..848851de9
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -16,6 +16,9 @@
 #include "macros.h"
 #include "option.h"
 
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
   // Initialization
@@ -24,11 +27,29 @@ int main(int argc, char* argv[]) {
     return -1;
   }
   auto im = cv::imread(FLAGS_image);
-  auto model_yolov5 =
-      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
-  fastdeploy::vision::DetectionResult res;
+  auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
+  vision::DetectionResult res;
+  // Run once at least
+  model_yolov5.Predict(im, &res);
+  // 1. Test result diff
+  std::cout << "=============== Test result diff =================\n";
+  // Save result to -> disk.
+  std::string det_result_path = "yolov5_result.txt";
+  benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
+  // Load result from <- disk.
+  vision::DetectionResult res_loaded;
+  benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
+  // Calculate diff between two results.
+  auto det_diff =
+      benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+  std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
+            << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
+            << std::endl;
+  std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
+            << ", max=" << det_diff.labels.max
+            << ", min=" << det_diff.labels.min << std::endl;
   BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
-  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  auto vis_im = vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 #endif
diff --git a/benchmark/cpp/run_benchmark_ppyolov8.sh b/benchmark/cpp/run_benchmark_ppyolov8.sh
deleted file mode 100644
index e69de29bb..000000000
diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc
old mode 100644
new mode 100755
index 5af28e4b1..a66bdb6c0
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -474,6 +474,34 @@ bool ResultManager::SaveSegmentationResult(
   return true;
 }
 
+bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
+                                     const std::string& path) {
+  if (res.empty()) {
+    FDERROR << "OCRDetResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // boxes
+  fs << "boxes" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.size(); ++i) {
+    for (int j = 0; j < 8; ++j) {
+      if ((i == res.size() - 1) && (j == 7)) {
+        fs << res[i][j];
+      } else {
+        fs << res[i][j] << VALUE_SEP;
+      }
+    }
+  }
+  fs << "\n";
+  fs.close();
+  return true;
+}
+
 bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
                                         const std::string& path) {
   if (!CheckFileExists(path)) {
@@ -556,6 +584,26 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
   return true;
 }
 
+bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
+                                     const std::string& path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from" << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+  // boxes
+  data = SplitDataLine(lines[0]);
+  int boxes_num = data.begin()->second.size() / 8;
+  res->resize(boxes_num);
+  for (int i = 0; i < boxes_num; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
+    }
+  }
+  return true;
+}
+
 DetectionDiff ResultManager::CalculateDiffStatis(
     const vision::DetectionResult& lhs, const vision::DetectionResult& rhs,
     const float& score_threshold) {
@@ -643,6 +691,31 @@ SegmentationDiff ResultManager::CalculateDiffStatis(
   return diff;
 }
 
+OCRDetDiff ResultManager::CalculateDiffStatis(
+    const std::vector<std::array<int, 8>>& lhs,
+    const std::vector<std::array<int, 8>>& rhs) {
+  const int boxes_nums = std::min(lhs.size(), rhs.size());
+  std::vector<std::array<int, 8>> lhs_sort = lhs;
+  std::vector<std::array<int, 8>> rhs_sort = rhs;
+  // lex sort by x(w) & y(h)
+  vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
+  vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
+  // get value diff
+  const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
+  std::vector<float> boxes_diff;
+  for (int i = 0; i < boxes_num; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
+    }
+  }
+
+  OCRDetDiff diff;
+  CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
+                             &(diff.boxes.mean), &(diff.boxes.max),
+                             &(diff.boxes.min));
+  return diff;
+}
+
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
 
diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h
index f4d608133..2ad0ae4aa 100755
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -122,6 +122,10 @@ struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff {
   EvalStatis labels;
 };
 
+struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff {
+  EvalStatis boxes;
+};
+
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
 
@@ -148,6 +152,10 @@ struct FASTDEPLOY_DECL ResultManager {
                                      const std::string& path);
   static bool LoadSegmentationResult(vision::SegmentationResult* res,
                                      const std::string& path);
+  static bool SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
+                               const std::string& path);
+  static bool LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
+                               const std::string& path);
   /// Calculate diff value between two basic results.
   static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs,
                                            const vision::DetectionResult& rhs,
@@ -157,6 +165,9 @@ struct FASTDEPLOY_DECL ResultManager {
   static SegmentationDiff CalculateDiffStatis(
       const vision::SegmentationResult& lhs,
       const vision::SegmentationResult& rhs);
+  static OCRDetDiff CalculateDiffStatis(
+      const std::vector<std::array<int, 8>>& lhs,
+      const std::vector<std::array<int, 8>>& rhs);
 #endif  // ENABLE_VISION
 #endif  // ENABLE_BENCHMARK
 };
diff --git a/fastdeploy/vision/classification/contrib/resnet.cc b/fastdeploy/vision/classification/contrib/resnet.cc
index 2eed67992..fffbeada6 100644
--- a/fastdeploy/vision/classification/contrib/resnet.cc
+++ b/fastdeploy/vision/classification/contrib/resnet.cc
@@ -13,23 +13,22 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/classification/contrib/resnet.h"
-#include "fastdeploy/vision/utils/utils.h"
 #include "fastdeploy/utils/perf.h"
+#include "fastdeploy/vision/utils/utils.h"
 
 namespace fastdeploy {
 namespace vision {
 namespace classification {
 
-ResNet::ResNet(const std::string& model_file,
-               const std::string& params_file,
+ResNet::ResNet(const std::string& model_file, const std::string& params_file,
                const RuntimeOption& custom_option,
                const ModelFormat& model_format) {
   // In constructor, the 3 steps below are necessary.
   // 1. set the Backend 2. set RuntimeOption 3. call Initialize()
 
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER};
     valid_gpu_backends = {Backend::PDINFER};
@@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file,
 }
 
 bool ResNet::Initialize() {
-
   // In this function, the 3 steps below are necessary.
   // 1. assign values to the global variables 2. call InitRuntime()
 
@@ -57,14 +55,15 @@ bool ResNet::Initialize() {
   return true;
 }
 
-
 bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
+  // In this function, the preprocess need be implemented according to the
+  // original Repos,
+  // The result of preprocess has to be saved in FDTensor variable, because the
+  // input of Infer() need to be std::vector<FDTensor>.
+  // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
+  // FDTensor variable.
 
-// In this function, the preprocess need be implemented according to the original Repos,
-// The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector<FDTensor>.
-// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable.
-        
-  if (mat->Height()!=size[0] || mat->Width()!=size[1]){
+  if (mat->Height() != size[0] || mat->Width() != size[1]) {
     int interp = cv::INTER_LINEAR;
     Resize::Run(mat, size[1], size[0], -1, -1, interp);
   }
@@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
-bool ResNet::Postprocess(FDTensor& infer_result,
-                                  ClassifyResult* result, int topk) {
-
-  // In this function, the postprocess need be implemented according to the original Repos,
-  // Finally the reslut of postprocess should be saved in ClassifyResult variable.
-  // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable.
+bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result,
+                         int topk) {
+  // In this function, the postprocess need be implemented according to the
+  // original Repos,
+  // Finally the reslut of postprocess should be saved in ClassifyResult
+  // variable.
+  // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
+  // variable.
 
   int num_classes = infer_result.shape[1];
   function::Softmax(infer_result, &infer_result);
-  const float* infer_result_buffer = reinterpret_cast<float*>(infer_result.Data());
+  const float* infer_result_buffer =
+      reinterpret_cast<float*>(infer_result.Data());
   topk = std::min(num_classes, topk);
   result->label_ids =
       utils::TopKIndices(infer_result_buffer, num_classes, topk);
@@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result,
 }
 
 bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
-
-  // In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially.
+  // In this function, the Preprocess(), Infer(), and Postprocess() are called
+  // sequentially.
 
   Mat mat(*im);
   std::vector<FDTensor> processed_data(1);
@@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
   return true;
 }
 
-
 }  // namespace classification
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
index e252ba0ee..35b3e17bb 100644
--- a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
+++ b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
@@ -20,18 +20,19 @@ namespace vision {
 namespace classification {
 
 YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
-  size_ = {224, 224}; //{h,w}
+  size_ = {224, 224};  //{h,w}
 }
 
-bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
-            std::map<std::string, std::array<float, 2>>* im_info) {
+bool YOLOv5ClsPreprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
 
   // process after image load
   double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
+                                             static_cast<float>(mat->Width()));
 
   // yolov5cls's preprocess steps
   // 1. CenterCrop
@@ -54,20 +55,22 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
-bool YOLOv5ClsPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool YOLOv5ClsPreprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
   for (size_t i = 0; i < images->size(); ++i) {
     if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
index f4ff11e8f..7b8fcc399 100644
--- a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
@@ -20,26 +20,27 @@ namespace vision {
 namespace detection {
 
 FastestDetPreprocessor::FastestDetPreprocessor() {
-  size_ = {352, 352}; //{h,w}
+  size_ = {352, 352};  //{h,w}
 }
 
-bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
-            std::map<std::string, std::array<float, 2>>* im_info) {
+bool FastestDetPreprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
 
   // process after image load
   double ratio = (size_[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
+                                             static_cast<float>(mat->Width()));
 
   // fastestdet's preprocess steps
   // 1. resize
   // 2. convert_and_permute(swap_rb=false)
-  Resize::Run(mat, size_[0], size_[1]); //resize
+  Resize::Run(mat, size_[0], size_[1]);  // resize
   std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  //convert to float and HWC2CHW
+  // convert to float and HWC2CHW
   ConvertAndPermute::Run(mat, alpha, beta, false);
 
   // Record output shape of preprocessed image
@@ -47,20 +48,22 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
-bool FastestDetPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool FastestDetPreprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
   for (size_t i = 0; i < images->size(); ++i) {
     if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/fastdeploy/vision/detection/contrib/nanodet_plus.cc
index 2babae49c..0b89cdbe2 100644
--- a/fastdeploy/vision/detection/contrib/nanodet_plus.cc
+++ b/fastdeploy/vision/detection/contrib/nanodet_plus.cc
@@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file,
                          const RuntimeOption& custom_option,
                          const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
old mode 100755
new mode 100644
index 8678ea181..88c34352b
--- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc
+++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
@@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
                            const RuntimeOption& custom_option,
                            const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER};
     valid_gpu_backends = {Backend::PDINFER};
@@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc
old mode 100755
new mode 100644
index dd4ef728a..cad66eb08
--- a/fastdeploy/vision/detection/contrib/yolor.cc
+++ b/fastdeploy/vision/detection/contrib/yolor.cc
@@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file,
              const RuntimeOption& custom_option,
              const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER};
     valid_gpu_backends = {Backend::PDINFER};
@@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
old mode 100755
new mode 100644
index 846e25131..658987b75
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
   }
 }
 
-bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
-            std::map<std::string, std::array<float, 2>>* im_info) {
+bool YOLOv5Preprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
-bool YOLOv5Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool YOLOv5Preprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
   for (size_t i = 0; i < images->size(); ++i) {
     if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc
index be4116eed..8d8f325dc 100644
--- a/fastdeploy/vision/detection/contrib/yolov5lite.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc
@@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
@@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess(
   output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                           input_tensor_cuda_buffer_device_);
   output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 #else
   FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
index b880ed337..e5bd82630 100644
--- a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) {
   }
 }
 
-bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
-            std::map<std::string, std::array<float, 2>>* im_info) {
+bool YOLOv5SegPreprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
-bool YOLOv5SegPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool YOLOv5SegPreprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
   for (size_t i = 0; i < images->size(); ++i) {
     if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov6.cc b/fastdeploy/vision/detection/contrib/yolov6.cc
index cae9ce3a6..bf3368242 100644
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
@@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess(
   output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                           input_tensor_cuda_buffer_device_);
   output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 #else
   FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
old mode 100755
new mode 100644
index 91e22f32b..3374e16bb
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
   }
 }
 
-bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
-            std::map<std::string, std::array<float, 2>>* im_info) {
+bool YOLOv7Preprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
-bool YOLOv7Preprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                             std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool YOLOv7Preprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
   outputs->resize(1);
   // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(images->size()); 
+  std::vector<FDTensor> tensors(images->size());
   for (size_t i = 0; i < images->size(); ++i) {
     if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
       FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
old mode 100755
new mode 100644
index daf4ee66b..af7ff0e5c
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
@@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
@@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
     return false;
   }
 
-  if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) {
+  if (!Postprocess(reused_output_tensors_[0], result, im_info,
+                   conf_threshold)) {
     FDERROR << "Failed to post process." << std::endl;
     return false;
   }
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
index 49961df65..e969771a2 100644
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
@@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess(
   output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
                           input_tensor_cuda_buffer_device_);
   output->device = Device::GPU;
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 #else
   FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
index 1c6d9f62c..ebb8b28cd 100644
--- a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
@@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess(
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/detection/contrib/yolox.cc b/fastdeploy/vision/detection/contrib/yolox.cc
index c1c071826..e7d931c42 100755
--- a/fastdeploy/vision/detection/contrib/yolox.cc
+++ b/fastdeploy/vision/detection/contrib/yolox.cc
@@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
index f7b689575..0b914fb05 100644
--- a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
+++ b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
@@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facealign/contrib/pfld.cc b/fastdeploy/vision/facealign/contrib/pfld.cc
index 5978f10b7..d57427090 100644
--- a/fastdeploy/vision/facealign/contrib/pfld.cc
+++ b/fastdeploy/vision/facealign/contrib/pfld.cc
@@ -22,13 +22,12 @@ namespace vision {
 
 namespace facealign {
 
-PFLD::PFLD(const std::string& model_file,
-           const std::string& params_file,
+PFLD::PFLD(const std::string& model_file, const std::string& params_file,
            const RuntimeOption& custom_option,
            const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -71,12 +70,13 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
-bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
-                       const std::map<std::string, std::array<int, 2>>& im_info) {
+bool PFLD::Postprocess(
+    FDTensor& infer_result, FaceAlignmentResult* result,
+    const std::map<std::string, std::array<int, 2>>& im_info) {
   FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
   if (infer_result.dtype != FDDataType::FP32) {
     FDERROR << "Only support post process with float32 data." << std::endl;
@@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
   }
 
   auto iter_in = im_info.find("input_shape");
-  FDASSERT(iter_in != im_info.end(),
-           "Cannot find input_shape from im_info.");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
   int in_h = iter_in->second[0];
   int in_w = iter_in->second[1];
 
@@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
     x = std::min(std::max(0.f, x), 1.0f);
     y = std::min(std::max(0.f, y), 1.0f);
     // decode landmarks (default 106 landmarks)
-    result->landmarks.emplace_back(
-        std::array<float, 2>{x * in_w, y * in_h});
+    result->landmarks.emplace_back(std::array<float, 2>{x * in_w, y * in_h});
   }
 
   return true;
diff --git a/fastdeploy/vision/facealign/contrib/pipnet.cc b/fastdeploy/vision/facealign/contrib/pipnet.cc
index 27ec35c0d..3af16fa91 100644
--- a/fastdeploy/vision/facealign/contrib/pipnet.cc
+++ b/fastdeploy/vision/facealign/contrib/pipnet.cc
@@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output,
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facedet/contrib/retinaface.cc b/fastdeploy/vision/facedet/contrib/retinaface.cc
index 6f38f5636..cd5f93ab9 100644
--- a/fastdeploy/vision/facedet/contrib/retinaface.cc
+++ b/fastdeploy/vision/facedet/contrib/retinaface.cc
@@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file,
                        const RuntimeOption& custom_option,
                        const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -145,7 +145,7 @@ bool RetinaFace::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facedet/contrib/ultraface.cc b/fastdeploy/vision/facedet/contrib/ultraface.cc
index e7dd99dc4..cf398b2e4 100644
--- a/fastdeploy/vision/facedet/contrib/ultraface.cc
+++ b/fastdeploy/vision/facedet/contrib/ultraface.cc
@@ -27,7 +27,7 @@ UltraFace::UltraFace(const std::string& model_file,
                      const RuntimeOption& custom_option,
                      const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  
+    valid_cpu_backends = {Backend::ORT};
     valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
@@ -90,7 +90,7 @@ bool UltraFace::Preprocess(
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.cc b/fastdeploy/vision/facedet/contrib/yolov5face.cc
index d508e905a..3fb309bea 100644
--- a/fastdeploy/vision/facedet/contrib/yolov5face.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov5face.cc
@@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file,
                        const RuntimeOption& custom_option,
                        const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_cpu_backends = {Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -115,11 +115,11 @@ bool YOLOv5Face::Preprocess(
   // process after image load
   float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
                          size[0] * 1.0f / static_cast<float>(mat->Width()));
-#ifndef __ANDROID__     
-  // Because of the low CPU performance on the Android device, 
-  // we decided to hide this extra resize. It won't make much 
+#ifndef __ANDROID__
+  // Because of the low CPU performance on the Android device,
+  // we decided to hide this extra resize. It won't make much
   // difference to the final result.
-  if (std::fabs(ratio - 1.0f) > 1e-06) {  
+  if (std::fabs(ratio - 1.0f) > 1e-06) {
     int interp = cv::INTER_LINEAR;
     if (ratio > 1.0) {
       interp = cv::INTER_LINEAR;
@@ -128,7 +128,7 @@ bool YOLOv5Face::Preprocess(
     int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
     Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
   }
-#endif  
+#endif
   // yolov5face's preprocess steps
   // 1. letterbox
   // 2. BGR->RGB
@@ -149,9 +149,9 @@ bool YOLOv5Face::Preprocess(
 
   HWC2CHW::Run(mat);
   Cast::Run(mat, "float");
-  
+
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
index ad5dd7e33..7af63f585 100644
--- a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
@@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
   max_wh_ = 7680.0;
 }
 
-bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                                 std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
+bool Yolov7FacePreprocessor::Run(
+    std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+    std::vector<std::map<std::string, std::array<float, 2>>>* ims_info) {
   if (images->size() == 0) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   ims_info->resize(images->size());
@@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
   return true;
 }
 
-bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
-                                        std::map<std::string, std::array<float, 2>>* im_info){
+bool Yolov7FacePreprocessor::Preprocess(
+    FDMat* mat, FDTensor* output,
+    std::map<std::string, std::array<float, 2>>* im_info) {
   // Record the shape of image and the shape of preprocessed image
   (*im_info)["input_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
@@ -75,13 +78,13 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
                                 static_cast<float>(mat->Width())};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
 void Yolov7FacePreprocessor::LetterBox(FDMat* mat) {
   float scale =
-      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width()); 
+      std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
   if (!is_scale_up_) {
     scale = std::min(scale, 1.0f);
   }
diff --git a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
old mode 100755
new mode 100644
index 8e8f95950..cb0d90310
--- a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
@@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() {
   permute_ = true;
 }
 
-bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
-
+bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) {
   // face recognition model's preprocess steps in insightface
   // reference: insightface/recognition/arcface_torch/inference.py
   // 1. Resize
@@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
   Cast::Run(mat, "float");
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
 bool AdaFacePreprocessor::Run(std::vector<FDMat>* images,
                               std::vector<FDTensor>* outputs) {
   if (images->empty()) {
-    FDERROR << "The size of input images should be greater than 0." << std::endl;
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
     return false;
   }
   FDASSERT(images->size() == 1, "Only support batch = 1 now.");
diff --git a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
index 398a7016e..e7f55cf65 100644
--- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
@@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat,
   }
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
diff --git a/fastdeploy/vision/headpose/contrib/fsanet.cc b/fastdeploy/vision/headpose/contrib/fsanet.cc
index 59f25ac5a..c22909134 100644
--- a/fastdeploy/vision/headpose/contrib/fsanet.cc
+++ b/fastdeploy/vision/headpose/contrib/fsanet.cc
@@ -22,13 +22,12 @@ namespace vision {
 
 namespace headpose {
 
-FSANet::FSANet(const std::string& model_file,
-               const std::string& params_file,
+FSANet::FSANet(const std::string& model_file, const std::string& params_file,
                const RuntimeOption& custom_option,
                const ModelFormat& model_format) {
   if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT}; 
+    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
   } else {
     valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -52,7 +51,7 @@ bool FSANet::Initialize() {
 }
 
 bool FSANet::Preprocess(Mat* mat, FDTensor* output,
-                      std::map<std::string, std::array<int, 2>>* im_info) {
+                        std::map<std::string, std::array<int, 2>>* im_info) {
   // Resize
   int resize_w = size[0];
   int resize_h = size[1];
@@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
 
   // Normalize
   std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
-  std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f};
+  std::vector<float> beta = {-127.5f / 128.0f, -127.5f / 128.0f,
+                             -127.5f / 128.0f};
   Convert::Run(mat, alpha, beta);
 
   // Record output shape of preprocessed image
@@ -72,12 +72,13 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
   Cast::Run(mat, "float");
 
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
-bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
-                       const std::map<std::string, std::array<int, 2>>& im_info) {
+bool FSANet::Postprocess(
+    FDTensor& infer_result, HeadPoseResult* result,
+    const std::map<std::string, std::array<int, 2>>& im_info) {
   FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
   if (infer_result.dtype != FDDataType::FP32) {
     FDERROR << "Only support post process with float32 data." << std::endl;
@@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
   }
 
   auto iter_in = im_info.find("input_shape");
-  FDASSERT(iter_in != im_info.end(),
-           "Cannot find input_shape from im_info.");
+  FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
   int in_h = iter_in->second[0];
   int in_w = iter_in->second[1];
 
diff --git a/fastdeploy/vision/matting/contrib/modnet.cc b/fastdeploy/vision/matting/contrib/modnet.cc
index c3a89733d..05141a926 100644
--- a/fastdeploy/vision/matting/contrib/modnet.cc
+++ b/fastdeploy/vision/matting/contrib/modnet.cc
@@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output,
   Cast::Run(mat, "float");
 
   mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, c, h, w
   return true;
 }
 
@@ -106,8 +106,8 @@ bool MODNet::Postprocess(
   float* alpha_ptr = static_cast<float*>(alpha_tensor.Data());
   // cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
   // Mat alpha_resized(alpha_zero_copy_ref);  // ref-only, zero copy.
-  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
-                                  alpha_ptr); // ref-only, zero copy.
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                  alpha_ptr);  // ref-only, zero copy.
   if ((out_h != ipt_h) || (out_w != ipt_w)) {
     Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
   }
diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc
old mode 100755
new mode 100644
index 258205cf8..2b16aab83
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess(
   (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
 
   mat->ShareWithTensor(output);
-  output->ExpandDim(0);  // reshape to n, h, w, c
+  output->ExpandDim(0);  // reshape to n, c, h, w
   return true;
 }
 
@@ -118,16 +118,16 @@ bool RobustVideoMatting::Postprocess(
 
   // for alpha
   float* alpha_ptr = static_cast<float*>(alpha.Data());
-  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
-                                  alpha_ptr); // ref-only, zero copy.
+  Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                  alpha_ptr);  // ref-only, zero copy.
   if ((out_h != in_h) || (out_w != in_w)) {
     Resize::Run(&alpha_resized, in_w, in_h, -1, -1);
   }
 
   // for foreground
   float* fgr_ptr = static_cast<float*>(fgr.Data());
-  Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32, 
-                                fgr_ptr); // ref-only, zero copy.
+  Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+                                fgr_ptr);  // ref-only, zero copy.
   if ((out_h != in_h) || (out_w != in_w)) {
     Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
   }
diff --git a/fastdeploy/vision/utils/sort_det_res.cc b/fastdeploy/vision/utils/sort_det_res.cc
old mode 100644
new mode 100755
index dd33478a3..d0813a260
--- a/fastdeploy/vision/utils/sort_det_res.cc
+++ b/fastdeploy/vision/utils/sort_det_res.cc
@@ -77,27 +77,42 @@ void SortDetectionResult(DetectionResult* result) {
   MergeSort(result, low, high);
 }
 
-bool LexSortByXYCompare(const std::array<float, 4>& box_a,
-                        const std::array<float, 4>& box_b) {
+template <typename T>
+bool LexSortByXYCompare(const std::array<T, 4>& box_a,
+                        const std::array<T, 4>& box_b) {
   // WARN: The status shoule be false if (a==b).
   // https://blog.csdn.net/xxxwrq/article/details/83080640
-  auto is_equal = [](const float& a, const float& b) -> bool {
+  auto is_equal = [](const T& a, const T& b) -> bool {
     return std::abs(a - b) < 1e-6f;
   };
-  const float& x0_a = box_a[0];
-  const float& y0_a = box_a[1];
-  const float& x0_b = box_b[0];
-  const float& y0_b = box_b[1];
+  const T& x0_a = box_a[0];
+  const T& y0_a = box_a[1];
+  const T& x0_b = box_b[0];
+  const T& y0_b = box_b[1];
   if (is_equal(x0_a, x0_b)) {
     return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
   }
   return x0_a > x0_b;
 }
 
+// Only for int dtype
+template <>
+bool LexSortByXYCompare(const std::array<int, 4>& box_a,
+                        const std::array<int, 4>& box_b) {
+  const int& x0_a = box_a[0];
+  const int& y0_a = box_a[1];
+  const int& x0_b = box_b[0];
+  const int& y0_b = box_b[1];
+  if (x0_a == x0_b) {
+    return y0_a == y0_b ? false : y0_a > y0_b;
+  }
+  return x0_a > x0_b;
+}
+
 void ReorderDetectionResultByIndices(DetectionResult* result,
                                      const std::vector<size_t>& indices) {
   // reorder boxes, scores, label_ids, masks
-  DetectionResult backup = (*result);  // move
+  DetectionResult backup = (*result);
   const bool contain_masks = backup.contain_masks;
   const int boxes_num = backup.boxes.size();
   result->Clear();
@@ -122,7 +137,7 @@ void ReorderDetectionResultByIndices(DetectionResult* result,
 }
 
 void LexSortDetectionResultByXY(DetectionResult* result) {
-  if (result->boxes.size() == 0) {
+  if (result->boxes.empty()) {
     return;
   }
   std::vector<size_t> indices;
@@ -138,6 +153,35 @@ void LexSortDetectionResultByXY(DetectionResult* result) {
   ReorderDetectionResultByIndices(result, indices);
 }
 
+void LexSortOCRDetResultByXY(std::vector<std::array<int, 8>>* result) {
+  if (result->empty()) {
+    return;
+  }
+  std::vector<size_t> indices;
+  indices.resize(result->size());
+  std::vector<std::array<int, 4>> boxes;
+  boxes.resize(result->size());
+  for (size_t i = 0; i < result->size(); ++i) {
+    indices[i] = i;
+    // 4 points to 2 points for LexSort
+    boxes[i] = {(*result)[i][0], (*result)[i][1], (*result)[i][6],
+                (*result)[i][7]};
+  }
+  // lex sort by x(w) then y(h)
+  std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
+    return LexSortByXYCompare(boxes[a], boxes[b]);
+  });
+  // reorder boxes
+  std::vector<std::array<int, 8>> backup = (*result);
+  const int boxes_num = backup.size();
+  result->clear();
+  result->resize(boxes_num);
+  // boxes
+  for (int i = 0; i < boxes_num; ++i) {
+    (*result)[i] = backup[indices[i]];
+  }
+}
+
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h
old mode 100644
new mode 100755
index bca781973..1f8f21b48
--- a/fastdeploy/vision/utils/utils.h
+++ b/fastdeploy/vision/utils/utils.h
@@ -67,8 +67,11 @@ void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
 /// Sort DetectionResult/FaceDetectionResult by score
 FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result);
 FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result);
-/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis
+/// Lex Sort DetectionResult by x(w) & y(h) axis
 FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result);
+/// Lex Sort OCRDet Result by x(w) & y(h) axis
+FASTDEPLOY_DECL void LexSortOCRDetResultByXY(
+                     std::vector<std::array<int, 8>>* result);
 
 /// L2 Norm / cosine similarity  (for face recognition, ...)
 FASTDEPLOY_DECL std::vector<float>