From ade27d29cbb5ada48b551b76be1512200e93b094 Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Mon, 17 Jul 2023 23:06:51 +0800
Subject: [PATCH] [Sync][Internal] sync some internal features of paddle3d
 inference (#2118)

* [Sync][Internal] sync some internal codes

* [Sync][Internal] sync some internal features of paddle3d inference

* [Sync][Internal] sync some internal features of paddle3d inference
---
 benchmark/paddlex/CMakeLists.txt              |   6 +-
 benchmark/paddlex/README.md                   |   3 +-
 benchmark/paddlex/benchmark_gpu.sh            |  10 +-
 benchmark/paddlex/benchmark_gpu_trt.sh        |   8 +-
 ..._pp3d_cadnn.cc => benchmark_pp3d_caddn.cc} |  29 ++--
 benchmark/paddlex/benchmark_x86.sh            |   8 +-
 benchmark/paddlex/get_models.sh               |   2 +-
 .../vision/perception/paddle3d/petr/petr.cc   |  37 +++--
 .../perception/paddle3d/petr/preprocessor.cc  | 132 ++++--------------
 .../perception/paddle3d/petr/preprocessor.h   |  29 +++-
 10 files changed, 107 insertions(+), 157 deletions(-)
 rename benchmark/paddlex/{benchmark_pp3d_cadnn.cc => benchmark_pp3d_caddn.cc} (78%)
 mode change 100755 => 100644 fastdeploy/vision/perception/paddle3d/petr/petr.cc

diff --git a/benchmark/paddlex/CMakeLists.txt b/benchmark/paddlex/CMakeLists.txt
index 90c2f2e74..d79ac18e1 100755
--- a/benchmark/paddlex/CMakeLists.txt
+++ b/benchmark/paddlex/CMakeLists.txt
@@ -21,7 +21,7 @@ add_executable(benchmark_structurev2_table ${PROJECT_SOURCE_DIR}/benchmark_struc
 add_executable(benchmark_structurev2_layout ${PROJECT_SOURCE_DIR}/benchmark_structurev2_layout.cc)
 add_executable(benchmark_ppshituv2_rec ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_rec.cc)
 add_executable(benchmark_ppshituv2_det ${PROJECT_SOURCE_DIR}/benchmark_ppshituv2_det.cc)
-add_executable(benchmark_pp3d_cadnn ${PROJECT_SOURCE_DIR}/benchmark_pp3d_cadnn.cc)
+add_executable(benchmark_pp3d_caddn ${PROJECT_SOURCE_DIR}/benchmark_pp3d_caddn.cc)
 add_executable(benchmark_pp3d_centerpoint ${PROJECT_SOURCE_DIR}/benchmark_pp3d_centerpoint.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
@@ -35,7 +35,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_structurev2_layout ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags pthread)
-  target_link_libraries(benchmark_pp3d_cadnn ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_pp3d_caddn ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_pp3d_centerpoint ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
@@ -48,7 +48,7 @@ else()
   target_link_libraries(benchmark_structurev2_layout ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppshituv2_rec ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppshituv2_det ${FASTDEPLOY_LIBS} gflags)
-  target_link_libraries(benchmark_pp3d_cadnn ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_pp3d_caddn ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_pp3d_centerpoint ${FASTDEPLOY_LIBS} gflags)
 endif()
 # only for Android ADB test
diff --git a/benchmark/paddlex/README.md b/benchmark/paddlex/README.md
index 9b865cdbc..b73495e57 100755
--- a/benchmark/paddlex/README.md
+++ b/benchmark/paddlex/README.md
@@ -131,10 +131,11 @@ tar -zxvf MobileNetV3_small_x1_0.tgz
 ## 4. 各个硬件上的一键运行脚本  
 
 在准备好相关的环境配置和SDK后，可以使用本目录提供的脚本一键运行后的benchmark数据。
-- 获取模型和资源文件  
+- 获取模型和资源文件
 ```bash
 ./get_models.sh
 ```  
+
 - 运行benchmark脚本  
 ```bash
 # x86 CPU Paddle backend fp32
diff --git a/benchmark/paddlex/benchmark_gpu.sh b/benchmark/paddlex/benchmark_gpu.sh
index 92b4b33c4..bf80e3944 100755
--- a/benchmark/paddlex/benchmark_gpu.sh
+++ b/benchmark/paddlex/benchmark_gpu.sh
@@ -29,7 +29,7 @@ fi
 
 # PaddleSeg
 ./benchmark_ppseg --model OCRNet_HRNetW48 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
-./benchmark_ppseg --model PP-LiteSeg-STDC1 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH 
+./benchmark_ppseg --model PP-LiteSeg-STDC1 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model SegFormer-B0 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP-MobileSeg-Base --image ppseg_ade_val_512x512.png --config_path $CONFIG_PATH
 
@@ -49,9 +49,9 @@ fi
 ./benchmark --model PP-Structurev2-vi-layoutxlm --shapes "1,512:1,512,4:1,512:1,512" --trt_shapes "1,512:1,512:1,512:1,512,4:1,512,4:1,512,4:1,512:1,512:1,512:1,512:1,512:1,512" --names "x_0:x_1:x_2:x_3" --dtypes "INT64:INT64:INT64:INT64" --disable_mkldnn --custom_tensor_value 0.2 --config_path $CONFIG_PATH
 
 # Paddle3D
-./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH 
-./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH 
-./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH 
-./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
+./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
+./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
+./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
+./benchmark_pp3d_caddn --model CADDN_OCRNet-HRNetW18 --image paddle3d_caddn_kitti_000780.png --config_path $CONFIG_PATH
 
 set +x
diff --git a/benchmark/paddlex/benchmark_gpu_trt.sh b/benchmark/paddlex/benchmark_gpu_trt.sh
index 4d449454c..24b9fb952 100755
--- a/benchmark/paddlex/benchmark_gpu_trt.sh
+++ b/benchmark/paddlex/benchmark_gpu_trt.sh
@@ -51,9 +51,9 @@ fi
 ./benchmark --model PP-Structurev2-vi-layoutxlm --shapes "1,512:1,512,4:1,512:1,512" --trt_shapes "1,512:1,512:1,512:1,512,4:1,512,4:1,512,4:1,512:1,512:1,512:1,512:1,512:1,512" --names "x_0:x_1:x_2:x_3" --dtypes "INT64:INT64:INT64:INT64" --disable_mkldnn --custom_tensor_value 0.2 --collect_trt_shape_by_custom_tensor_value --collect_trt_shape_by_device --config_path $CONFIG_PATH
 
 # Paddle3D
-./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --trt_shapes "1,6,3,320,800:1,6,3,320,800:1,6,3,320,800:1,6,4,4:1,6,4,4:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH 
-./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --trt_shapes "1,12,3,320,800:1,12,3,320,800:1,12,3,320,800:1,12,4,4:1,12,4,4:1,12,4,4:1,12:1,12:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH 
-./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH 
-./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
+./benchmark --model PETRv1_v99 --shapes "1,6,3,320,800:1,6,4,4" --trt_shapes "1,6,3,320,800:1,6,3,320,800:1,6,3,320,800:1,6,4,4:1,6,4,4:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
+./benchmark --model PETRv2_v99 --shapes "1,12,3,320,800:1,12,4,4:1,12" --trt_shapes "1,12,3,320,800:1,12,3,320,800:1,12,3,320,800:1,12,4,4:1,12,4,4:1,12,4,4:1,12:1,12:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --config_path $CONFIG_PATH
+./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
+./benchmark_pp3d_caddn --model CADDN_OCRNet-HRNetW18 --image paddle3d_caddn_kitti_000780.png --config_path $CONFIG_PATH
 
 set +x
diff --git a/benchmark/paddlex/benchmark_pp3d_cadnn.cc b/benchmark/paddlex/benchmark_pp3d_caddn.cc
similarity index 78%
rename from benchmark/paddlex/benchmark_pp3d_cadnn.cc
rename to benchmark/paddlex/benchmark_pp3d_caddn.cc
index d16787bd4..f014bfcce 100644
--- a/benchmark/paddlex/benchmark_pp3d_cadnn.cc
+++ b/benchmark/paddlex/benchmark_pp3d_caddn.cc
@@ -56,24 +56,25 @@ int main(int argc, char* argv[]) {
   }
   if (config_info["backend"] == "paddle_trt" ||
       config_info["backend"] == "trt") {
-    // use custom data to perform collect shapes.  
-    option.trt_option.SetShape("images", {1, 3, 375, 1242},
-      {1, 3, 375, 1242}, {1, 3, 375, 1242});
-    option.trt_option.SetShape("trans_lidar_to_cam", {1, 4, 4},
-      {1, 4, 4}, {1, 4, 4});
-    option.trt_option.SetShape("trans_cam_to_img", {1, 3, 4},
-      {1, 3, 4}, {1, 3, 4});
+    // use custom data to perform collect shapes.
+    option.trt_option.SetShape("images", {1, 3, 375, 1242}, {1, 3, 375, 1242},
+                               {1, 3, 375, 1242});
+    option.trt_option.SetShape("trans_lidar_to_cam", {1, 4, 4}, {1, 4, 4},
+                               {1, 4, 4});
+    option.trt_option.SetShape("trans_cam_to_img", {1, 3, 4}, {1, 3, 4},
+                               {1, 3, 4});
     std::vector<float> image_data;
-    image_data.assign(im.data, im.data + 1*3*375*1242);
-    option.trt_option.SetInputData("trans_lidar_to_cam", lidar_data);  
-    option.trt_option.SetInputData("trans_cam_to_img", cam_data);  
-    option.trt_option.SetInputData("images", image_data);  
+    image_data.assign(im.data, im.data + 1 * 3 * 375 * 1242);
+    option.trt_option.SetInputData("trans_lidar_to_cam", lidar_data);
+    option.trt_option.SetInputData("trans_cam_to_img", cam_data);
+    option.trt_option.SetInputData("images", image_data);
   }
-  auto model_cadnn = vision::perception::Caddn(
-      model_file, params_file, "", option, model_format);
+  auto model_cadnn = vision::perception::Caddn(model_file, params_file, "",
+                                               option, model_format);
   vision::PerceptionResult res;
   // Run profiling
-  BENCHMARK_MODEL(model_cadnn, model_cadnn.Predict(im, cam_data, lidar_data, &res))
+  BENCHMARK_MODEL(model_cadnn,
+                  model_cadnn.Predict(im, cam_data, lidar_data, &res))
   std::cout << res.Str() << std::endl;
 #endif
 
diff --git a/benchmark/paddlex/benchmark_x86.sh b/benchmark/paddlex/benchmark_x86.sh
index ca668bb60..e2c7eba70 100755
--- a/benchmark/paddlex/benchmark_x86.sh
+++ b/benchmark/paddlex/benchmark_x86.sh
@@ -29,7 +29,7 @@ fi
 
 # PaddleSeg
 ./benchmark_ppseg --model OCRNet_HRNetW48 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
-./benchmark_ppseg --model PP-LiteSeg-STDC1 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH 
+./benchmark_ppseg --model PP-LiteSeg-STDC1 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model SegFormer-B0 --image ppseg_cityscapes_demo_512x512.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP-MobileSeg-Base --image ppseg_ade_val_512x512.png --config_path $CONFIG_PATH
 
@@ -51,7 +51,7 @@ fi
 # Paddle3D
 ./benchmark --model PETRv1_v99 --config_path $CONFIG_PATH --shapes "1,6,3,320,800:1,6,4,4" --names "images:img2lidars" --dtypes "FP32:FP32" --disable_mkldnn --warmup 5 --repeat 20
 ./benchmark --model PETRv2_v99 --config_path $CONFIG_PATH --shapes "1,12,3,320,800:1,12,4,4:1,12" --names "images:img2lidars:timestamps" --dtypes "FP32:FP32:FP32" --disable_mkldnn --warmup 5 --repeat 20
-./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH 
-./benchmark_pp3d_cadnn --model CADNN_OCRNet-HRNetW18 --image paddle3d_cadnn_kitti_000780.png --config_path $CONFIG_PATH
+./benchmark_pp3d_centerpoint --model CenterPoint-Pillars-02Voxel --image paddle3d_centerpoint_n008_LIDAR_TOP__1533151603547590.pcd.bin --config_path $CONFIG_PATH
+./benchmark_pp3d_caddn --model CADDN_OCRNet-HRNetW18 --image paddle3d_caddn_kitti_000780.png --config_path $CONFIG_PATH
 
-set +x
\ No newline at end of file
+set +x
diff --git a/benchmark/paddlex/get_models.sh b/benchmark/paddlex/get_models.sh
index 48f95c1b5..d2cfa4a77 100755
--- a/benchmark/paddlex/get_models.sh
+++ b/benchmark/paddlex/get_models.sh
@@ -68,7 +68,7 @@ download PP-Structurev2-SLANet.tgz
 download PP-Structurev2-vi-layoutxlm.tgz
 
 # Paddle3D
-download CADNN_OCRNet-HRNetW18.tgz
+download CADDN_OCRNet-HRNetW18.tgz
 download CenterPoint-Pillars-02Voxel.tgz
 download PETRv1_v99.tgz
 download PETRv2_v99.tgz
diff --git a/fastdeploy/vision/perception/paddle3d/petr/petr.cc b/fastdeploy/vision/perception/paddle3d/petr/petr.cc
old mode 100755
new mode 100644
index 0a08b6404..770fee797
--- a/fastdeploy/vision/perception/paddle3d/petr/petr.cc
+++ b/fastdeploy/vision/perception/paddle3d/petr/petr.cc
@@ -19,8 +19,8 @@ namespace vision {
 namespace perception {
 
 Petr::Petr(const std::string& model_file, const std::string& params_file,
-             const std::string& config_file, const RuntimeOption& custom_option,
-             const ModelFormat& model_format)
+           const std::string& config_file, const RuntimeOption& custom_option,
+           const ModelFormat& model_format)
     : preprocessor_(config_file) {
   valid_cpu_backends = {Backend::PDINFER};
   valid_gpu_backends = {Backend::PDINFER};
@@ -41,29 +41,38 @@ bool Petr::Initialize() {
   return true;
 }
 
-bool Petr::Predict(const cv::Mat& im, PerceptionResult* result) {
-  std::vector<PerceptionResult> results;
-  if (!BatchPredict({im}, &results)) {
-    return false;
-  }
-  if (results.size()) {
-    *result = std::move(results[0]);
-  }
-  return true;
+bool Petr::Predict(const cv::Mat& images, PerceptionResult* results) {
+  FDERROR << "Petr inference only support 6(V1) or 12(V2) images" << std::endl;
+  return false;
 }
 
 bool Petr::BatchPredict(const std::vector<cv::Mat>& images,
-                         std::vector<PerceptionResult>* results) {
+                        std::vector<PerceptionResult>* results) {
+  if ((images.size() != 6) && (images.size() != 12)) {
+    FDERROR << "Petr only support 6(V1) or 12(V2) images";
+    return false;
+  }
   std::vector<FDMat> fd_images = WrapMat(images);
 
   if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
     FDERROR << "Failed to preprocess the input image." << std::endl;
     return false;
   }
-
+   
+  // Note: un-commented the codes below to show the debug info. 
+  // reused_input_tensors_[0].PrintInfo();
+  // reused_input_tensors_[1].PrintInfo();
+  // reused_input_tensors_[2].PrintInfo();
+  
   reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
   reused_input_tensors_[1].name = InputInfoOfRuntime(1).name;
-  reused_input_tensors_[2].name = InputInfoOfRuntime(2).name;
+  if (images.size() == 12) {
+    // for Petr V2 timestamp
+    reused_input_tensors_[2].name = InputInfoOfRuntime(2).name;
+  } else {
+    // for Petr V1
+    reused_input_tensors_.pop_back();
+  }
 
   if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
     FDERROR << "Failed to inference by runtime." << std::endl;
diff --git a/fastdeploy/vision/perception/paddle3d/petr/preprocessor.cc b/fastdeploy/vision/perception/paddle3d/petr/preprocessor.cc
index 6c3b4d5ec..61129dd52 100644
--- a/fastdeploy/vision/perception/paddle3d/petr/preprocessor.cc
+++ b/fastdeploy/vision/perception/paddle3d/petr/preprocessor.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/perception/paddle3d/petr/preprocessor.h"
+
 #include <iostream>
 
 #include "fastdeploy/function/concat.h"
@@ -31,78 +32,20 @@ PetrPreprocessor::PetrPreprocessor(const std::string& config_file) {
 
 bool PetrPreprocessor::BuildPreprocessPipelineFromConfig() {
   processors_.clear();
-  YAML::Node cfg;
-  try {
-    cfg = YAML::LoadFile(config_file_);
-  } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file_
-            << ", maybe you should check this file." << std::endl;
-    return false;
-  }
 
-  // read for preprocess
-  bool has_permute = false;
-  for (const auto& op : cfg["Preprocess"]) {
-    std::string op_name = op["type"].as<std::string>();
-    if (op_name == "NormalizeImage") {
-      auto mean = op["mean"].as<std::vector<float>>();
-      auto std = op["std"].as<std::vector<float>>();
-      bool is_scale = true;
-      if (op["is_scale"]) {
-        is_scale = op["is_scale"].as<bool>();
-      }
-      std::string norm_type = "mean_std";
-      if (op["norm_type"]) {
-        norm_type = op["norm_type"].as<std::string>();
-      }
-      if (norm_type != "mean_std") {
-        std::fill(mean.begin(), mean.end(), 0.0);
-        std::fill(std.begin(), std.end(), 1.0);
-      }
-      mean_ = mean;
-      std_ = std;
-    } else if (op_name == "Resize") {
-      bool keep_ratio = op["keep_ratio"].as<bool>();
-      auto target_size = op["target_size"].as<std::vector<int>>();
-      int interp = op["interp"].as<int>();
-      FDASSERT(target_size.size() == 2,
-               "Require size of target_size be 2, but now it's %lu.",
-               target_size.size());
-      if (!keep_ratio) {
-        int width = target_size[0];
-        int height = target_size[1];
-        processors_.push_back(
-            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
-      } else {
-        int min_target_size = std::min(target_size[0], target_size[1]);
-        int max_target_size = std::max(target_size[0], target_size[1]);
-        std::vector<int> max_size;
-        if (max_target_size > 0) {
-          max_size.push_back(max_target_size);
-          max_size.push_back(max_target_size);
-        }
-        processors_.push_back(std::make_shared<ResizeByShort>(
-            min_target_size, interp, true, max_size));
-      }
-    } else if (op_name == "Permute") {
-      // Do nothing, do permute as the last operation
-      has_permute = true;
-      continue;
-    } else {
-      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
-              << std::endl;
-      return false;
-    }
-  }
-  if (!disable_permute_) {
-    if (has_permute) {
-      // permute = cast<float> + HWC2CHW
-      processors_.push_back(std::make_shared<Cast>("float"));
-      processors_.push_back(std::make_shared<HWC2CHW>());
-    }
-  }
+  processors_.push_back(std::make_shared<Resize>(800, 450));
+  processors_.push_back(std::make_shared<Crop>(0, 130, 800, 320));
+
+  std::vector<float> mean{103.530, 116.280, 123.675};
+  std::vector<float> std{57.375, 57.120, 58.395};
+  bool scale = false;
+  processors_.push_back(std::make_shared<Normalize>(mean, std, scale));
+  processors_.push_back(std::make_shared<Cast>("float"));
+  processors_.push_back(std::make_shared<HWC2CHW>());
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
 
-  input_k_data_ = cfg["k_data"].as<std::vector<float>>();
   return true;
 }
 
@@ -119,16 +62,16 @@ bool PetrPreprocessor::Apply(FDMatBatch* image_batch,
   }
   // There are 3 outputs, image, k_data, timestamp
   outputs->resize(3);
-  int batch = static_cast<int>(image_batch->mats->size());
+  int num_cams = static_cast<int>(image_batch->mats->size());
 
   // Allocate memory for k_data
-  (*outputs)[1].Resize({1, batch, 4, 4}, FDDataType::FP32);
+  (*outputs)[1].Resize({1, num_cams, 4, 4}, FDDataType::FP32);
 
   // Allocate memory for image_data
-  (*outputs)[0].Resize({1, batch, 3, 320, 800}, FDDataType::FP32);
+  (*outputs)[0].Resize({1, num_cams, 3, 320, 800}, FDDataType::FP32);
 
   // Allocate memory for timestamp
-  (*outputs)[2].Resize({1, batch}, FDDataType::FP32);
+  (*outputs)[2].Resize({1, num_cams}, FDDataType::FP32);
 
   auto* image_ptr = reinterpret_cast<float*>((*outputs)[0].MutableData());
 
@@ -144,53 +87,28 @@ bool PetrPreprocessor::Apply(FDMatBatch* image_batch,
                 << processors_[j]->Name() << "." << std::endl;
         return false;
       }
-      if (processors_[j]->Name() == "Resize") {
-        // crop and normalize after Resize
-        auto img = *(mat->GetOpenCVMat());
-        cv::Mat crop_img = img(cv::Range(130, 450), cv::Range(0, 800));
-        Normalize(&crop_img, mean_, std_, scale_);
-        FDMat fd_mat = WrapMat(crop_img);
-        image_batch->mats->at(i) = fd_mat;
-      }
     }
   }
 
-  for (int i = 0; i < batch / 2 * 4 * 4; ++i) {
-    input_k_data_.emplace_back(input_k_data_[i]);
+  for (int i = 0; i < num_cams / 2 * 4 * 4; ++i) {
+    input_k_data_.push_back(input_k_data_[i]);
   }
+  memcpy(k_data_ptr, input_k_data_.data(), num_cams * 16 * sizeof(float));
 
-  memcpy(k_data_ptr, input_k_data_.data(), batch * 16 * sizeof(float));
-
-  std::vector<float> timestamp(batch, 0.0f);
-  for (int i = batch / 2; i < batch; ++i) {
+  std::vector<float> timestamp(num_cams, 0.0f);
+  for (int i = num_cams / 2; i < num_cams; ++i) {
     timestamp[i] = 1.0f;
   }
-  memcpy(timestamp_ptr, timestamp.data(), batch * sizeof(float));
+  memcpy(timestamp_ptr, timestamp.data(), num_cams * sizeof(float));
 
-  FDTensor* tensor = image_batch->Tensor();
+  FDTensor* tensor = image_batch->Tensor(); // [num_cams,3,320,800]
+  tensor->ExpandDim(0); // [num_cams,3,320,800] -> [1,num_cams,3,320,800]
   (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
                                 tensor->Data(), tensor->device,
                                 tensor->device_id);
   return true;
 }
 
-void PetrPreprocessor::Normalize(cv::Mat* im, const std::vector<float>& mean,
-                                 const std::vector<float>& std, float& scale) {
-  if (scale) {
-    (*im).convertTo(*im, CV_32FC3, scale);
-  }
-  for (int h = 0; h < im->rows; h++) {
-    for (int w = 0; w < im->cols; w++) {
-      im->at<cv::Vec3f>(h, w)[0] =
-          (im->at<cv::Vec3f>(h, w)[0] - mean[0]) / std[0];
-      im->at<cv::Vec3f>(h, w)[1] =
-          (im->at<cv::Vec3f>(h, w)[1] - mean[1]) / std[1];
-      im->at<cv::Vec3f>(h, w)[2] =
-          (im->at<cv::Vec3f>(h, w)[2] - mean[2]) / std[2];
-    }
-  }
-}
-
 }  // namespace perception
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/perception/paddle3d/petr/preprocessor.h b/fastdeploy/vision/perception/paddle3d/petr/preprocessor.h
index 381c81abe..320140026 100755
--- a/fastdeploy/vision/perception/paddle3d/petr/preprocessor.h
+++ b/fastdeploy/vision/perception/paddle3d/petr/preprocessor.h
@@ -41,9 +41,6 @@ class FASTDEPLOY_DECL PetrPreprocessor : public ProcessorManager  {
    */
   bool Apply(FDMatBatch* image_batch, std::vector<FDTensor>* outputs);
 
-  void Normalize(cv::Mat *im, const std::vector<float> &mean,
-               const std::vector<float> &std, float &scale);
-
  protected:
   bool BuildPreprocessPipelineFromConfig();
   std::vector<std::shared_ptr<Processor>> processors_;
@@ -58,7 +55,31 @@ class FASTDEPLOY_DECL PetrPreprocessor : public ProcessorManager  {
   std::vector<float> mean_;
   std::vector<float> std_;
 
-  std::vector<float> input_k_data_;
+  std::vector<float> input_k_data_{
+    -1.40307297e-03, 9.07780395e-06, 4.84838307e-01, -5.43047376e-02,
+    -1.40780103e-04, 1.25770375e-05, 1.04126692e+00, 7.67668605e-01,
+    -1.02884378e-05, -1.41007011e-03, 1.02823459e-01, -3.07415128e-01,
+    0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
+    -9.39000631e-04, -7.65239349e-07, 1.14073277e+00, 4.46270645e-01,
+    1.04998052e-03, 1.91798881e-05, 2.06218868e-01, 7.42717385e-01,
+    1.48074005e-05, -1.40855671e-03, 7.45946690e-02, -3.16081315e-01,
+    0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
+    -7.0699735e-04, 4.2389297e-07,  -5.5183989e-01, -5.3276348e-01,
+    -1.2281288e-03, 2.5626015e-05, 1.0212017e+00, 6.1102939e-01,
+    -2.2421273e-05, -1.4170362e-03, 9.3639769e-02, -3.0863306e-01,
+    0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
+    2.2227580e-03, 2.5312484e-06, -9.7261822e-01, 9.0684637e-02,
+    1.9360810e-04, 2.1347081e-05, -1.0779887e+00, -7.9227984e-01,
+    4.3742721e-06, -2.2310747e-03, 1.0842450e-01, -2.9406491e-01,
+    0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
+    5.97175560e-04, -5.88774265e-06, -1.15893924e+00, -4.49921310e-01,
+    -1.28312141e-03, 3.58297058e-07, 1.48300052e-01, 1.14334166e-01,
+    -2.80917516e-06, -1.41527120e-03, 8.37693438e-02, -2.36765608e-01,
+    0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
+    3.6048229e-04, 3.8333174e-06, 7.9871160e-01, 4.3321830e-01,
+    1.3671946e-03, 6.7484652e-06, -8.4722507e-01, 1.9411178e-01,
+    7.5027779e-06, -1.4139183e-03, 8.2083985e-02, -2.4505949e-01,
+    0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00};
 };
 
 }  // namespace perception