Refine code structure (#89)

* refine code structure * refine code structure
2025-10-05 16:48:03 +08:00 · 2022-08-10 10:50:22 +08:00
parent c7d37b6732
commit 22ca63982b
333 changed files with 1 additions and 37500 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ option(CSRCS_DIR_NAME "Name of source code directory")
 option(LIBRARY_NAME "Name of build library name")
 option(PY_LIBRARY_NAME "Name of build python library name")
 if(NOT CSRCS_DIR_NAME)
-  set(CSRCS_DIR_NAME "csrcs")
+  set(CSRCS_DIR_NAME "csrc")
 endif()
 if(NOT LIBRARY_NAME)
  set(LIBRARY_NAME "fastdeploy")
@@ -55,10 +55,6 @@ option(ENABLE_FDTENSOR_FUNC "Whether to compile with function of FDTensor." OFF)
 option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
 option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF)

-# Whether to build fastdeply with vision/text/... examples, only for testings.
-option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF)
-option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF)
-
 # config GIT_URL with github mirrors to speed up dependent repos clone
 option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
 if(NOT GIT_URL)
@@ -102,19 +98,6 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
 include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})

-if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  # ENABLE_VISION and ENABLE_VISION_VISUALIZE must be ON if enable vision examples.
-  message(STATUS "Found WTIH_VISION_EXAMPLES ON, so, force ENABLE_VISION and ENABLE_VISION_VISUALIZE ON")
-  set(ENABLE_VISION ON CACHE BOOL "force to enable vision models usage" FORCE)
-  set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE)
-endif()
-
-if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  # ENABLE_TEXT must be ON if enable text examples.
-  message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON")
-  set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE)
-endif()
-
 add_definitions(-DFASTDEPLOY_LIB)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
@@ -277,22 +260,6 @@ if(MSVC)
 endif()
 target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})

-# add examples after prepare include paths for third-parties
-if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  add_definitions(-DWITH_VISION_EXAMPLES)
-  set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
-  add_subdirectory(examples)
-endif()
-
-if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  add_definitions(-DWITH_TEXT_EXAMPLES)
-  set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
-  # Avoid to add_subdirectory repeatedly
-  if (NOT WITH_VISION_EXAMPLES)
-    add_subdirectory(examples)
-  endif()
-endif()
-
 if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests)
  add_definitions(-DWITH_TESTING)
  include(external/gtest.cmake)
--- a/csrcs/fastdeploy/CMakeLists.txt
+++ b/csrcs/fastdeploy/CMakeLists.txt
--- a/csrcs/fastdeploy/backends/backend.h
+++ b/csrcs/fastdeploy/backends/backend.h
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-#include "fastdeploy/backends/common/multiclass_nms.h"
-#include "fastdeploy/core/fd_tensor.h"
-
-namespace fastdeploy {
-
-struct TensorInfo {
-  std::string name;
-  std::vector<int> shape;
-  FDDataType dtype;
-};
-
-class BaseBackend {
- public:
-  bool initialized_ = false;
-
-  BaseBackend() {}
-  virtual ~BaseBackend() = default;
-
-  virtual bool Initialized() const { return initialized_; }
-
-  virtual int NumInputs() const = 0;
-  virtual int NumOutputs() const = 0;
-  virtual TensorInfo GetInputInfo(int index) = 0;
-  virtual TensorInfo GetOutputInfo(int index) = 0;
-  virtual bool Infer(std::vector<FDTensor>& inputs,
-                     std::vector<FDTensor>* outputs) = 0;
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/common/multiclass_nms.cc
+++ b/csrcs/fastdeploy/backends/common/multiclass_nms.cc
@@ -1,224 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/common/multiclass_nms.h"
-#include <algorithm>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace backend {
-template <class T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-void GetMaxScoreIndex(const float* scores, const int& score_size,
-                      const float& threshold, const int& top_k,
-                      std::vector<std::pair<float, int>>* sorted_indices) {
-  for (size_t i = 0; i < score_size; ++i) {
-    if (scores[i] > threshold) {
-      sorted_indices->push_back(std::make_pair(scores[i], i));
-    }
-  }
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
-                   SortScorePairDescend<int>);
-  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
-    sorted_indices->resize(top_k);
-  }
-}
-
-float BBoxArea(const float* box, const bool& normalized) {
-  if (box[2] < box[0] || box[3] < box[1]) {
-    // If coordinate values are is invalid
-    // (e.g. xmax < xmin or ymax < ymin), return 0.
-    return 0.f;
-  } else {
-    const float w = box[2] - box[0];
-    const float h = box[3] - box[1];
-    if (normalized) {
-      return w * h;
-    } else {
-      // If coordinate values are not within range [0, 1].
-      return (w + 1) * (h + 1);
-    }
-  }
-}
-
-float JaccardOverlap(const float* box1, const float* box2,
-                     const bool& normalized) {
-  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
-      box2[3] < box1[1]) {
-    return 0.f;
-  } else {
-    const float inter_xmin = std::max(box1[0], box2[0]);
-    const float inter_ymin = std::max(box1[1], box2[1]);
-    const float inter_xmax = std::min(box1[2], box2[2]);
-    const float inter_ymax = std::min(box1[3], box2[3]);
-    float norm = normalized ? 0.0f : 1.0f;
-    float inter_w = inter_xmax - inter_xmin + norm;
-    float inter_h = inter_ymax - inter_ymin + norm;
-    const float inter_area = inter_w * inter_h;
-    const float bbox1_area = BBoxArea(box1, normalized);
-    const float bbox2_area = BBoxArea(box2, normalized);
-    return inter_area / (bbox1_area + bbox2_area - inter_area);
-  }
-}
-
-void MultiClassNMS::FastNMS(const float* boxes, const float* scores,
-                            const int& num_boxes,
-                            std::vector<int>* keep_indices) {
-  std::vector<std::pair<float, int>> sorted_indices;
-  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
-                   &sorted_indices);
-
-  float adaptive_threshold = nms_threshold;
-  while (sorted_indices.size() != 0) {
-    const int idx = sorted_indices.front().second;
-    bool keep = true;
-    for (size_t k = 0; k < keep_indices->size(); ++k) {
-      if (!keep) {
-        break;
-      }
-      const int kept_idx = (*keep_indices)[k];
-      float overlap =
-          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
-      keep = overlap <= adaptive_threshold;
-    }
-    if (keep) {
-      keep_indices->push_back(idx);
-    }
-    sorted_indices.erase(sorted_indices.begin());
-    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
-      adaptive_threshold *= nms_eta;
-    }
-  }
-}
-
-int MultiClassNMS::NMSForEachSample(
-    const float* boxes, const float* scores, int num_boxes, int num_classes,
-    std::map<int, std::vector<int>>* keep_indices) {
-  for (int i = 0; i < num_classes; ++i) {
-    if (i == background_label) {
-      continue;
-    }
-    const float* score_for_class_i = scores + i * num_boxes;
-    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
-  }
-  int num_det = 0;
-  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
-    num_det += iter->second.size();
-  }
-
-  if (keep_top_k > -1 && num_det > keep_top_k) {
-    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-    for (const auto& it : *keep_indices) {
-      int label = it.first;
-      const float* current_score = scores + label * num_boxes;
-      auto& label_indices = it.second;
-      for (size_t j = 0; j < label_indices.size(); ++j) {
-        int idx = label_indices[j];
-        score_index_pairs.push_back(
-            std::make_pair(current_score[idx], std::make_pair(label, idx)));
-      }
-    }
-    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
-                     SortScorePairDescend<std::pair<int, int>>);
-    score_index_pairs.resize(keep_top_k);
-
-    std::map<int, std::vector<int>> new_indices;
-    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
-      int label = score_index_pairs[j].second.first;
-      int idx = score_index_pairs[j].second.second;
-      new_indices[label].push_back(idx);
-    }
-    new_indices.swap(*keep_indices);
-    num_det = keep_top_k;
-  }
-  return num_det;
-}
-
-void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
-                            const std::vector<int64_t>& boxes_dim,
-                            const std::vector<int64_t>& scores_dim) {
-  int score_size = scores_dim.size();
-
-  int64_t batch_size = scores_dim[0];
-  int64_t box_dim = boxes_dim[2];
-  int64_t out_dim = box_dim + 2;
-
-  int num_nmsed_out = 0;
-  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
-                                std::to_string(score_size) + ".");
-  FDASSERT(boxes_dim[2] == 4,
-           "Require the 3-dimension of input boxes be 4, but now it's " +
-               std::to_string(boxes_dim[2]) + ".");
-  out_num_rois_data.resize(batch_size);
-
-  std::vector<std::map<int, std::vector<int>>> all_indices;
-  for (size_t i = 0; i < batch_size; ++i) {
-    std::map<int, std::vector<int>> indices;  // indices kept for each class
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
-                               boxes_dim[1], scores_dim[1], &indices);
-    num_nmsed_out += num;
-    out_num_rois_data[i] = num;
-    all_indices.emplace_back(indices);
-  }
-  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
-  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
-  if (num_nmsed_out == 0) {
-    for (size_t i = 0; i < batch_size; ++i) {
-      out_num_rois_data[i] = 0;
-    }
-    return;
-  }
-  out_box_data.resize(num_nmsed_out * 6);
-  out_index_data.resize(num_nmsed_out);
-
-  int count = 0;
-  for (size_t i = 0; i < batch_size; ++i) {
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    for (const auto& it : all_indices[i]) {
-      int label = it.first;
-      const auto& indices = it.second;
-      const float* current_scores_class_ptr =
-          current_scores_ptr + label * scores_dim[2];
-      for (size_t j = 0; j < indices.size(); ++j) {
-        int start = count * 6;
-        out_box_data[start] = label;
-        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
-
-        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
-        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
-        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
-
-        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
-        out_index_data[count] = i * boxes_dim[1] + indices[j];
-        count += 1;
-      }
-    }
-  }
-}
-}  // namespace backend
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/common/multiclass_nms.h
+++ b/csrcs/fastdeploy/backends/common/multiclass_nms.h
@@ -1,45 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <map>
-#include <string>
-#include <vector>
-
-namespace fastdeploy {
-namespace backend {
-struct MultiClassNMS {
-  int64_t background_label = -1;
-  int64_t keep_top_k = -1;
-  float nms_eta;
-  float nms_threshold = 0.7;
-  int64_t nms_top_k;
-  bool normalized;
-  float score_threshold;
-
-  std::vector<int32_t> out_num_rois_data;
-  std::vector<int32_t> out_index_data;
-  std::vector<float> out_box_data;
-  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
-               std::vector<int>* keep_indices);
-  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
-                       int num_classes,
-                       std::map<int, std::vector<int>>* keep_indices);
-  void Compute(const float* boxes, const float* scores,
-               const std::vector<int64_t>& boxes_dim,
-               const std::vector<int64_t>& scores_dim);
-};
-}  // namespace backend
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc
+++ b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc
@@ -1,261 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef NON_64_PLATFORM
-
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
-#include <algorithm>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-struct OrtTensorDimensions : std::vector<int64_t> {
-  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
-    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
-    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
-    ort.ReleaseTensorTypeAndShapeInfo(info);
-  }
-};
-
-template <class T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-void GetMaxScoreIndex(const float* scores, const int& score_size,
-                      const float& threshold, const int& top_k,
-                      std::vector<std::pair<float, int>>* sorted_indices) {
-  for (size_t i = 0; i < score_size; ++i) {
-    if (scores[i] > threshold) {
-      sorted_indices->push_back(std::make_pair(scores[i], i));
-    }
-  }
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
-                   SortScorePairDescend<int>);
-  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
-    sorted_indices->resize(top_k);
-  }
-}
-
-float BBoxArea(const float* box, const bool& normalized) {
-  if (box[2] < box[0] || box[3] < box[1]) {
-    // If coordinate values are is invalid
-    // (e.g. xmax < xmin or ymax < ymin), return 0.
-    return 0.f;
-  } else {
-    const float w = box[2] - box[0];
-    const float h = box[3] - box[1];
-    if (normalized) {
-      return w * h;
-    } else {
-      // If coordinate values are not within range [0, 1].
-      return (w + 1) * (h + 1);
-    }
-  }
-}
-
-float JaccardOverlap(const float* box1, const float* box2,
-                     const bool& normalized) {
-  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
-      box2[3] < box1[1]) {
-    return 0.f;
-  } else {
-    const float inter_xmin = std::max(box1[0], box2[0]);
-    const float inter_ymin = std::max(box1[1], box2[1]);
-    const float inter_xmax = std::min(box1[2], box2[2]);
-    const float inter_ymax = std::min(box1[3], box2[3]);
-    float norm = normalized ? 0.0f : 1.0f;
-    float inter_w = inter_xmax - inter_xmin + norm;
-    float inter_h = inter_ymax - inter_ymin + norm;
-    const float inter_area = inter_w * inter_h;
-    const float bbox1_area = BBoxArea(box1, normalized);
-    const float bbox2_area = BBoxArea(box2, normalized);
-    return inter_area / (bbox1_area + bbox2_area - inter_area);
-  }
-}
-
-void MultiClassNmsKernel::FastNMS(const float* boxes, const float* scores,
-                                  const int& num_boxes,
-                                  std::vector<int>* keep_indices) {
-  std::vector<std::pair<float, int>> sorted_indices;
-  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
-                   &sorted_indices);
-
-  float adaptive_threshold = nms_threshold;
-  while (sorted_indices.size() != 0) {
-    const int idx = sorted_indices.front().second;
-    bool keep = true;
-    for (size_t k = 0; k < keep_indices->size(); ++k) {
-      if (!keep) {
-        break;
-      }
-      const int kept_idx = (*keep_indices)[k];
-      float overlap =
-          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
-      keep = overlap <= adaptive_threshold;
-    }
-    if (keep) {
-      keep_indices->push_back(idx);
-    }
-    sorted_indices.erase(sorted_indices.begin());
-    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
-      adaptive_threshold *= nms_eta;
-    }
-  }
-}
-
-int MultiClassNmsKernel::NMSForEachSample(
-    const float* boxes, const float* scores, int num_boxes, int num_classes,
-    std::map<int, std::vector<int>>* keep_indices) {
-  for (int i = 0; i < num_classes; ++i) {
-    if (i == background_label) {
-      continue;
-    }
-    const float* score_for_class_i = scores + i * num_boxes;
-    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
-  }
-  int num_det = 0;
-  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
-    num_det += iter->second.size();
-  }
-
-  if (keep_top_k > -1 && num_det > keep_top_k) {
-    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-    for (const auto& it : *keep_indices) {
-      int label = it.first;
-      const float* current_score = scores + label * num_boxes;
-      auto& label_indices = it.second;
-      for (size_t j = 0; j < label_indices.size(); ++j) {
-        int idx = label_indices[j];
-        score_index_pairs.push_back(
-            std::make_pair(current_score[idx], std::make_pair(label, idx)));
-      }
-    }
-    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
-                     SortScorePairDescend<std::pair<int, int>>);
-    score_index_pairs.resize(keep_top_k);
-
-    std::map<int, std::vector<int>> new_indices;
-    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
-      int label = score_index_pairs[j].second.first;
-      int idx = score_index_pairs[j].second.second;
-      new_indices[label].push_back(idx);
-    }
-    new_indices.swap(*keep_indices);
-    num_det = keep_top_k;
-  }
-  return num_det;
-}
-
-void MultiClassNmsKernel::Compute(OrtKernelContext* context) {
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* boxes_data =
-      reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const float* scores_data =
-      reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
-  int score_size = scores_dim.size();
-
-  int64_t batch_size = scores_dim[0];
-  int64_t box_dim = boxes_dim[2];
-  int64_t out_dim = box_dim + 2;
-
-  int num_nmsed_out = 0;
-  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
-                                std::to_string(score_size) + ".");
-  FDASSERT(boxes_dim[2] == 4,
-           "Require the 3-dimension of input boxes be 4, but now it's " +
-               std::to_string(boxes_dim[2]) + ".");
-  std::vector<int64_t> out_num_rois_dims = {batch_size};
-  OrtValue* out_num_rois = ort_.KernelContext_GetOutput(
-      context, 2, out_num_rois_dims.data(), out_num_rois_dims.size());
-  int32_t* out_num_rois_data = ort_.GetTensorMutableData<int32_t>(out_num_rois);
-
-  std::vector<std::map<int, std::vector<int>>> all_indices;
-  for (size_t i = 0; i < batch_size; ++i) {
-    std::map<int, std::vector<int>> indices;  // indices kept for each class
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
-                               boxes_dim[1], scores_dim[1], &indices);
-    num_nmsed_out += num;
-    out_num_rois_data[i] = num;
-    all_indices.emplace_back(indices);
-  }
-  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
-  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
-  OrtValue* out_box = ort_.KernelContext_GetOutput(
-      context, 0, out_box_dims.data(), out_box_dims.size());
-  OrtValue* out_index = ort_.KernelContext_GetOutput(
-      context, 1, out_index_dims.data(), out_index_dims.size());
-  if (num_nmsed_out == 0) {
-    int32_t* out_num_rois_data =
-        ort_.GetTensorMutableData<int32_t>(out_num_rois);
-    for (size_t i = 0; i < batch_size; ++i) {
-      out_num_rois_data[i] = 0;
-    }
-    return;
-  }
-  float* out_box_data = ort_.GetTensorMutableData<float>(out_box);
-  int32_t* out_index_data = ort_.GetTensorMutableData<int32_t>(out_index);
-
-  int count = 0;
-  for (size_t i = 0; i < batch_size; ++i) {
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    for (const auto& it : all_indices[i]) {
-      int label = it.first;
-      const auto& indices = it.second;
-      const float* current_scores_class_ptr =
-          current_scores_ptr + label * scores_dim[2];
-      for (size_t j = 0; j < indices.size(); ++j) {
-        int start = count * 6;
-        out_box_data[start] = label;
-        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
-
-        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
-        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
-        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
-
-        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
-        out_index_data[count] = i * boxes_dim[1] + indices[j];
-        count += 1;
-      }
-    }
-  }
-}
-
-void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) {
-  background_label =
-      ort_.KernelInfoGetAttribute<int64_t>(info, "background_label");
-  keep_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "keep_top_k");
-  nms_eta = ort_.KernelInfoGetAttribute<float>(info, "nms_eta");
-  nms_threshold = ort_.KernelInfoGetAttribute<float>(info, "nms_threshold");
-  nms_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
-  normalized = ort_.KernelInfoGetAttribute<int64_t>(info, "normalized");
-  score_threshold = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
-}
-}  // namespace fastdeploy
-
-#endif
--- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h
+++ b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h
@@ -1,81 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <map>
-
-#ifndef NON_64_PLATFORM
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct MultiClassNmsKernel {
- protected:
-  int64_t background_label = -1;
-  int64_t keep_top_k = -1;
-  float nms_eta;
-  float nms_threshold = 0.7;
-  int64_t nms_top_k;
-  bool normalized;
-  float score_threshold;
-  Ort::CustomOpApi ort_;
-
- public:
-  MultiClassNmsKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
-      : ort_(ort) {
-    GetAttribute(info);
-  }
-
-  void GetAttribute(const OrtKernelInfo* info);
-
-  void Compute(OrtKernelContext* context);
-  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
-               std::vector<int>* keep_indices);
-  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
-                       int num_classes,
-                       std::map<int, std::vector<int>>* keep_indices);
-};
-
-struct MultiClassNmsOp
-    : Ort::CustomOpBase<MultiClassNmsOp, MultiClassNmsKernel> {
-  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
-    return new MultiClassNmsKernel(api, info);
-  }
-
-  const char* GetName() const { return "MultiClassNMS"; }
-
-  size_t GetInputTypeCount() const { return 2; }
-
-  ONNXTensorElementDataType GetInputType(size_t index) const {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-  }
-
-  size_t GetOutputTypeCount() const { return 3; }
-
-  ONNXTensorElementDataType GetOutputType(size_t index) const {
-    if (index == 0) {
-      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-    }
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
-  }
-
-  const char* GetExecutionProviderType() const {
-    return "CPUExecutionProvider";
-  }
-};
-
-}  // namespace fastdeploy
-
-#endif
--- a/csrcs/fastdeploy/backends/ort/ort_backend.cc
+++ b/csrcs/fastdeploy/backends/ort/ort_backend.cc
@@ -1,279 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/ort/ort_backend.h"
-#include <memory>
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
-#include "fastdeploy/backends/ort/utils.h"
-#include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
-#include "paddle2onnx/converter.h"
-#endif
-
-namespace fastdeploy {
-
-std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
-    std::vector<OrtCustomOp*>();
-
-void OrtBackend::BuildOption(const OrtBackendOption& option) {
-  option_ = option;
-  if (option.graph_optimization_level >= 0) {
-    session_options_.SetGraphOptimizationLevel(
-        GraphOptimizationLevel(option.graph_optimization_level));
-  }
-  if (option.intra_op_num_threads >= 0) {
-    session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
-  }
-  if (option.inter_op_num_threads >= 0) {
-    session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
-  }
-  if (option.execution_mode >= 0) {
-    session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
-  }
-  if (option.use_gpu) {
-    auto all_providers = Ort::GetAvailableProviders();
-    bool support_cuda = false;
-    std::string providers_msg = "";
-    for (size_t i = 0; i < all_providers.size(); ++i) {
-      providers_msg = providers_msg + all_providers[i] + ", ";
-      if (all_providers[i] == "CUDAExecutionProvider") {
-        support_cuda = true;
-      }
-    }
-    if (!support_cuda) {
-      FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
-                   "support GPU, the available providers are "
-                << providers_msg << "will fallback to CPUExecutionProvider."
-                << std::endl;
-      option_.use_gpu = false;
-    } else {
-      FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
-                                       std::to_string(option.gpu_id) + ".");
-      OrtCUDAProviderOptions cuda_options;
-      cuda_options.device_id = option.gpu_id;
-      session_options_.AppendExecutionProvider_CUDA(cuda_options);
-    }
-  }
-}
-
-bool OrtBackend::InitFromPaddle(const std::string& model_file,
-                                const std::string& params_file,
-                                const OrtBackendOption& option, bool verbose) {
-  if (initialized_) {
-    FDERROR << "OrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-#ifdef ENABLE_PADDLE_FRONTEND
-  char* model_content_ptr;
-  int model_content_size = 0;
-
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    strcpy(op.op_name, item.first.c_str());
-    strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
-  if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
-                           &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
-    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
-            << std::endl;
-    return false;
-  }
-
-  std::string onnx_model_proto(model_content_ptr,
-                               model_content_ptr + model_content_size);
-  delete[] model_content_ptr;
-  model_content_ptr = nullptr;
-  return InitFromOnnx(onnx_model_proto, option, true);
-#else
-  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
-             "call `InitFromOnnx` instead."
-          << std::endl;
-#endif
-  return false;
-}
-
-bool OrtBackend::InitFromOnnx(const std::string& model_file,
-                              const OrtBackendOption& option,
-                              bool from_memory_buffer) {
-  if (initialized_) {
-    FDERROR << "OrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-
-  BuildOption(option);
-  InitCustomOperators();
-  if (from_memory_buffer) {
-    session_ = {env_, model_file.data(), model_file.size(), session_options_};
-  } else {
-#ifdef _WIN32
-    session_ = {env_,
-                std::wstring(model_file.begin(), model_file.end()).c_str(),
-                session_options_};
-#else
-    session_ = {env_, model_file.c_str(), session_options_};
-#endif
-  }
-  binding_ = std::make_shared<Ort::IoBinding>(session_);
-
-  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
-  Ort::Allocator allocator(session_, memory_info);
-  size_t n_inputs = session_.GetInputCount();
-  for (size_t i = 0; i < n_inputs; ++i) {
-    auto input_name = session_.GetInputName(i, allocator);
-    auto type_info = session_.GetInputTypeInfo(i);
-    std::vector<int64_t> shape =
-        type_info.GetTensorTypeAndShapeInfo().GetShape();
-    ONNXTensorElementDataType data_type =
-        type_info.GetTensorTypeAndShapeInfo().GetElementType();
-    inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type});
-    allocator.Free(input_name);
-  }
-
-  size_t n_outputs = session_.GetOutputCount();
-  for (size_t i = 0; i < n_outputs; ++i) {
-    auto output_name = session_.GetOutputName(i, allocator);
-    auto type_info = session_.GetOutputTypeInfo(i);
-    std::vector<int64_t> shape =
-        type_info.GetTensorTypeAndShapeInfo().GetShape();
-    ONNXTensorElementDataType data_type =
-        type_info.GetTensorTypeAndShapeInfo().GetElementType();
-    outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type});
-
-    Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
-                                    OrtMemTypeDefault);
-    binding_->BindOutput(output_name, out_memory_info);
-
-    allocator.Free(output_name);
-  }
-  initialized_ = true;
-  return true;
-}
-
-void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
-  const auto info = value.GetTensorTypeAndShapeInfo();
-  const auto data_type = info.GetElementType();
-  size_t numel = info.GetElementCount();
-  tensor->shape = info.GetShape();
-
-  if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
-    tensor->data.resize(numel * sizeof(float));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(float));
-    tensor->dtype = FDDataType::FP32;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
-    tensor->data.resize(numel * sizeof(int32_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(int32_t));
-    tensor->dtype = FDDataType::INT32;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
-    tensor->data.resize(numel * sizeof(int64_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(int64_t));
-    tensor->dtype = FDDataType::INT64;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
-    tensor->data.resize(numel * sizeof(double));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(double));
-    tensor->dtype = FDDataType::FP64;
-  } else {
-    FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) +
-                        " while calling OrtBackend::CopyToCpu().");
-  }
-}
-
-bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
-                       std::vector<FDTensor>* outputs) {
-  if (inputs.size() != inputs_desc_.size()) {
-    FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
-            << ") should keep same with the inputs of this model("
-            << inputs_desc_.size() << ")." << std::endl;
-    return false;
-  }
-
-  // from FDTensor to Ort Inputs
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
-    binding_->BindInput(inputs[i].name.c_str(), ort_value);
-  }
-
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
-                                OrtMemTypeDefault);
-    binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
-  }
-
-  // Inference with inputs
-  try {
-    session_.Run({}, *(binding_.get()));
-  } catch (const std::exception& e) {
-    FDERROR << "Failed to Infer: " << e.what() << std::endl;
-    return false;
-  }
-
-  // Copy result after inference
-  std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
-  outputs->resize(ort_outputs.size());
-  for (size_t i = 0; i < ort_outputs.size(); ++i) {
-    (*outputs)[i].name = outputs_desc_[i].name;
-    CopyToCpu(ort_outputs[i], &((*outputs)[i]));
-  }
-
-  return true;
-}
-
-TensorInfo OrtBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  TensorInfo info;
-  info.name = inputs_desc_[index].name;
-  info.shape.assign(inputs_desc_[index].shape.begin(),
-                    inputs_desc_[index].shape.end());
-  info.dtype = GetFdDtype(inputs_desc_[index].dtype);
-  return info;
-}
-
-TensorInfo OrtBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  TensorInfo info;
-  info.name = outputs_desc_[index].name;
-  info.shape.assign(outputs_desc_[index].shape.begin(),
-                    outputs_desc_[index].shape.end());
-  info.dtype = GetFdDtype(outputs_desc_[index].dtype);
-  return info;
-}
-
-void OrtBackend::InitCustomOperators() {
-#ifndef NON_64_PLATFORM
-  if (custom_operators_.size() == 0) {
-    MultiClassNmsOp* custom_op = new MultiClassNmsOp{};
-    custom_operators_.push_back(custom_op);
-  }
-  for (size_t i = 0; i < custom_operators_.size(); ++i) {
-    custom_op_domain_.Add(custom_operators_[i]);
-  }
-  session_options_.Add(custom_op_domain_);
-#endif
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/ort/ort_backend.h
+++ b/csrcs/fastdeploy/backends/ort/ort_backend.h
@@ -1,93 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct OrtValueInfo {
-  std::string name;
-  std::vector<int64_t> shape;
-  ONNXTensorElementDataType dtype;
-};
-
-struct OrtBackendOption {
-  // -1 means default
-  // 0: ORT_DISABLE_ALL
-  // 1: ORT_ENABLE_BASIC
-  // 2: ORT_ENABLE_EXTENDED
-  // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
-  int graph_optimization_level = -1;
-  int intra_op_num_threads = -1;
-  int inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL
-  // 1: ORT_PARALLEL
-  int execution_mode = -1;
-  bool use_gpu = false;
-  int gpu_id = 0;
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-class OrtBackend : public BaseBackend {
- public:
-  OrtBackend() {}
-  virtual ~OrtBackend() = default;
-
-  void BuildOption(const OrtBackendOption& option);
-
-  bool InitFromPaddle(const std::string& model_file,
-                      const std::string& params_file,
-                      const OrtBackendOption& option = OrtBackendOption(),
-                      bool verbose = false);
-
-  bool InitFromOnnx(const std::string& model_file,
-                    const OrtBackendOption& option = OrtBackendOption(),
-                    bool from_memory_buffer = false);
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-
-  int NumOutputs() const { return outputs_desc_.size(); }
-
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-  static std::vector<OrtCustomOp*> custom_operators_;
-  void InitCustomOperators();
-
- private:
-  Ort::Env env_;
-  Ort::Session session_{nullptr};
-  Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::IoBinding> binding_;
-  std::vector<OrtValueInfo> inputs_desc_;
-  std::vector<OrtValueInfo> outputs_desc_;
-#ifndef NON_64_PLATFORM
-  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
-#endif
-  OrtBackendOption option_;
-  void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
-};
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/ort/utils.cc
+++ b/csrcs/fastdeploy/backends/ort/utils.cc
@@ -1,67 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/ort/utils.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) {
-  if (fd_dtype == FDDataType::FP32) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-  } else if (fd_dtype == FDDataType::FP64) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
-  } else if (fd_dtype == FDDataType::INT32) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
-  } else if (fd_dtype == FDDataType::INT64) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
-  }
-  FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
-          << std::endl;
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
-}
-
-FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) {
-  if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
-    return FDDataType::FP32;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
-    return FDDataType::FP64;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
-    return FDDataType::INT32;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
-    return FDDataType::INT64;
-  }
-  FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
-  return FDDataType::FP32;
-}
-
-Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) {
-  FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
-           "Only support tensor which device is CPU or GPU for OrtBackend.");
-  if (tensor.device == Device::GPU && is_backend_cuda) {
-    Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
-                                OrtMemTypeDefault);
-    auto ort_value = Ort::Value::CreateTensor(
-        memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
-        tensor.shape.size(), GetOrtDtype(tensor.dtype));
-    return ort_value;
-  }
-  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
-  auto ort_value = Ort::Value::CreateTensor(
-      memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
-      tensor.shape.size(), GetOrtDtype(tensor.dtype));
-  return ort_value;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/ort/utils.h
+++ b/csrcs/fastdeploy/backends/ort/utils.h
@@ -1,39 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-// Convert FDDataType to OrtDataType
-ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype);
-
-// Convert OrtDataType to FDDataType
-FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype);
-
-// Create Ort::Value
-// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
-// While is_backend_cuda = true, and tensor.device = Device::GPU
-// Will directly share the cuda data in tensor to OrtValue
-Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false);
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/csrcs/fastdeploy/backends/paddle/paddle_backend.cc
@@ -1,105 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-
-namespace fastdeploy {
-
-void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
-  if (option.use_gpu) {
-    config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
-  } else {
-    config_.DisableGpu();
-    if (option.enable_mkldnn) {
-      config_.EnableMKLDNN();
-      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
-    }
-  }
-  config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
-}
-
-bool PaddleBackend::InitFromPaddle(const std::string& model_file,
-                                   const std::string& params_file,
-                                   const PaddleBackendOption& option) {
-  if (initialized_) {
-    FDERROR << "PaddleBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  config_.SetModel(model_file, params_file);
-  BuildOption(option);
-  predictor_ = paddle_infer::CreatePredictor(config_);
-  std::vector<std::string> input_names = predictor_->GetInputNames();
-  std::vector<std::string> output_names = predictor_->GetOutputNames();
-  for (size_t i = 0; i < input_names.size(); ++i) {
-    auto handle = predictor_->GetInputHandle(input_names[i]);
-    TensorInfo info;
-    auto shape = handle->shape();
-    info.shape.assign(shape.begin(), shape.end());
-    info.dtype = PaddleDataTypeToFD(handle->type());
-    info.name = input_names[i];
-    inputs_desc_.emplace_back(info);
-  }
-  for (size_t i = 0; i < output_names.size(); ++i) {
-    auto handle = predictor_->GetOutputHandle(output_names[i]);
-    TensorInfo info;
-    auto shape = handle->shape();
-    info.shape.assign(shape.begin(), shape.end());
-    info.dtype = PaddleDataTypeToFD(handle->type());
-    info.name = output_names[i];
-    outputs_desc_.emplace_back(info);
-  }
-  initialized_ = true;
-  return true;
-}
-
-TensorInfo PaddleBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  return inputs_desc_[index];
-}
-
-TensorInfo PaddleBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  return outputs_desc_[index];
-}
-
-bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
-                          std::vector<FDTensor>* outputs) {
-  if (inputs.size() != inputs_desc_.size()) {
-    FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
-            << ") should keep same with the inputs of this model("
-            << inputs_desc_.size() << ")." << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto handle = predictor_->GetInputHandle(inputs[i].name);
-    ShareTensorFromCpu(handle.get(), inputs[i]);
-  }
-
-  predictor_->Run();
-  outputs->resize(outputs_desc_.size());
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
-    CopyTensorToCpu(handle, &((*outputs)[i]));
-  }
-  return true;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/paddle/paddle_backend.h
+++ b/csrcs/fastdeploy/backends/paddle/paddle_backend.h
@@ -1,78 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "paddle_inference_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct PaddleBackendOption {
-#ifdef WITH_GPU
-  bool use_gpu = true;
-#else
-  bool use_gpu = false;
-#endif
-  bool enable_mkldnn = true;
-
-  int mkldnn_cache_size = 1;
-  int cpu_thread_num = 8;
-  // initialize memory size(MB) for GPU
-  int gpu_mem_init_size = 100;
-  // gpu device id
-  int gpu_id = 0;
-};
-
-// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
-void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
-
-// Copy memory data from paddle_infer::Tensor to fastdeploy::FDTensor
-void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
-                     FDTensor* fd_tensor);
-
-// Convert data type from paddle inference to fastdeploy
-FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
-
-class PaddleBackend : public BaseBackend {
- public:
-  PaddleBackend() {}
-  virtual ~PaddleBackend() = default;
-  void BuildOption(const PaddleBackendOption& option);
-
-  bool InitFromPaddle(
-      const std::string& model_file, const std::string& params_file,
-      const PaddleBackendOption& option = PaddleBackendOption());
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-
-  int NumOutputs() const { return outputs_desc_.size(); }
-
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
- private:
-  paddle_infer::Config config_;
-  std::shared_ptr<paddle_infer::Predictor> predictor_;
-  std::vector<TensorInfo> inputs_desc_;
-  std::vector<TensorInfo> outputs_desc_;
-};
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/paddle/util.cc
+++ b/csrcs/fastdeploy/backends/paddle/util.cc
@@ -1,76 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-
-namespace fastdeploy {
-void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) {
-  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
-  tensor->Reshape(shape);
-  if (fd_tensor.dtype == FDDataType::FP32) {
-    tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  } else if (fd_tensor.dtype == FDDataType::INT32) {
-    tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  } else if (fd_tensor.dtype == FDDataType::INT64) {
-    tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  }
-  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor.dtype) +
-                      ") while infer with PaddleBackend.");
-}
-
-void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
-                     FDTensor* fd_tensor) {
-  auto fd_dtype = PaddleDataTypeToFD(tensor->type());
-  std::vector<int64_t> shape;
-  auto tmp_shape = tensor->shape();
-  shape.assign(tmp_shape.begin(), tmp_shape.end());
-  fd_tensor->Allocate(shape, fd_dtype, tensor->name());
-  if (fd_tensor->dtype == FDDataType::FP32) {
-    tensor->CopyToCpu(static_cast<float*>(fd_tensor->MutableData()));
-    return;
-  } else if (fd_tensor->dtype == FDDataType::INT32) {
-    tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor->MutableData()));
-    return;
-  } else if (fd_tensor->dtype == FDDataType::INT64) {
-    tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor->MutableData()));
-    return;
-  }
-  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor->dtype) +
-                      ") while infer with PaddleBackend.");
-}
-
-FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype) {
-  auto fd_dtype = FDDataType::FP32;
-  if (dtype == paddle_infer::FLOAT32) {
-    fd_dtype = FDDataType::FP32;
-  } else if (dtype == paddle_infer::INT64) {
-    fd_dtype = FDDataType::INT64;
-  } else if (dtype == paddle_infer::INT32) {
-    fd_dtype = FDDataType::INT32;
-  } else if (dtype == paddle_infer::UINT8) {
-    fd_dtype = FDDataType::UINT8;
-  } else {
-    FDASSERT(false, "Unexpected data type:" + std::to_string(int(dtype)) +
-                        " while call CopyTensorToCpu in PaddleBackend.");
-  }
-  return fd_dtype;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h
@@ -1,342 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef BATCH_STREAM_H
-#define BATCH_STREAM_H
-
-#include "NvInfer.h"
-#include "common.h"
-#include <algorithm>
-#include <stdio.h>
-#include <vector>
-
-class IBatchStream {
- public:
-  virtual void reset(int firstBatch) = 0;
-  virtual bool next() = 0;
-  virtual void skip(int skipCount) = 0;
-  virtual float* getBatch() = 0;
-  virtual float* getLabels() = 0;
-  virtual int getBatchesRead() const = 0;
-  virtual int getBatchSize() const = 0;
-  virtual nvinfer1::Dims getDims() const = 0;
-};
-
-class MNISTBatchStream : public IBatchStream {
- public:
-  MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
-                   const std::string& labelsFile,
-                   const std::vector<std::string>& directories)
-      : mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
-  //!< We already know the dimensions of MNIST images.
-  {
-    readDataFile(locateFile(dataFile, directories));
-    readLabelsFile(locateFile(labelsFile, directories));
-  }
-
-  void reset(int firstBatch) override { mBatchCount = firstBatch; }
-
-  bool next() override {
-    if (mBatchCount >= mMaxBatches) {
-      return false;
-    }
-    ++mBatchCount;
-    return true;
-  }
-
-  void skip(int skipCount) override { mBatchCount += skipCount; }
-
-  float* getBatch() override {
-    return mData.data() +
-           (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
-  }
-
-  float* getLabels() override {
-    return mLabels.data() + (mBatchCount * mBatchSize);
-  }
-
-  int getBatchesRead() const override { return mBatchCount; }
-
-  int getBatchSize() const override { return mBatchSize; }
-
-  nvinfer1::Dims getDims() const override {
-    return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
-  }
-
- private:
-  void readDataFile(const std::string& dataFilePath) {
-    std::ifstream file{dataFilePath.c_str(), std::ios::binary};
-
-    int magicNumber, numImages, imageH, imageW;
-    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
-    // All values in the MNIST files are big endian.
-    magicNumber = samplesCommon::swapEndianness(magicNumber);
-    ASSERT(magicNumber == 2051 &&
-           "Magic Number does not match the expected value for an MNIST image "
-           "set");
-
-    // Read number of images and dimensions
-    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
-    file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
-    file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
-
-    numImages = samplesCommon::swapEndianness(numImages);
-    imageH = samplesCommon::swapEndianness(imageH);
-    imageW = samplesCommon::swapEndianness(imageW);
-
-    // The MNIST data is made up of unsigned bytes, so we need to cast to float
-    // and normalize.
-    int numElements = numImages * imageH * imageW;
-    std::vector<uint8_t> rawData(numElements);
-    file.read(reinterpret_cast<char*>(rawData.data()),
-              numElements * sizeof(uint8_t));
-    mData.resize(numElements);
-    std::transform(rawData.begin(), rawData.end(), mData.begin(),
-                   [](uint8_t val) { return static_cast<float>(val) / 255.f; });
-  }
-
-  void readLabelsFile(const std::string& labelsFilePath) {
-    std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
-    int magicNumber, numImages;
-    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
-    // All values in the MNIST files are big endian.
-    magicNumber = samplesCommon::swapEndianness(magicNumber);
-    ASSERT(magicNumber == 2049 &&
-           "Magic Number does not match the expected value for an MNIST labels "
-           "file");
-
-    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
-    numImages = samplesCommon::swapEndianness(numImages);
-
-    std::vector<uint8_t> rawLabels(numImages);
-    file.read(reinterpret_cast<char*>(rawLabels.data()),
-              numImages * sizeof(uint8_t));
-    mLabels.resize(numImages);
-    std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
-                   [](uint8_t val) { return static_cast<float>(val); });
-  }
-
-  int mBatchSize{0};
-  int mBatchCount{
-      0}; //!< The batch that will be read on the next invocation of next()
-  int mMaxBatches{0};
-  Dims mDims{};
-  std::vector<float> mData{};
-  std::vector<float> mLabels{};
-};
-
-class BatchStream : public IBatchStream {
- public:
-  BatchStream(int batchSize, int maxBatches, std::string prefix,
-              std::string suffix, std::vector<std::string> directories)
-      : mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
-        mSuffix(suffix), mDataDir(directories) {
-    FILE* file = fopen(
-        locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
-        "rb");
-    ASSERT(file != nullptr);
-    int d[4];
-    size_t readSize = fread(d, sizeof(int), 4, file);
-    ASSERT(readSize == 4);
-    mDims.nbDims = 4;  // The number of dimensions.
-    mDims.d[0] = d[0]; // Batch Size
-    mDims.d[1] = d[1]; // Channels
-    mDims.d[2] = d[2]; // Height
-    mDims.d[3] = d[3]; // Width
-    ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
-           mDims.d[3] > 0);
-    fclose(file);
-
-    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
-    mBatch.resize(mBatchSize * mImageSize, 0);
-    mLabels.resize(mBatchSize, 0);
-    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
-    mFileLabels.resize(mDims.d[0], 0);
-    reset(0);
-  }
-
-  BatchStream(int batchSize, int maxBatches, std::string prefix,
-              std::vector<std::string> directories)
-      : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
-
-  BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
-              std::string listFile, std::vector<std::string> directories)
-      : mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
-        mListFile(listFile), mDataDir(directories) {
-    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
-    mBatch.resize(mBatchSize * mImageSize, 0);
-    mLabels.resize(mBatchSize, 0);
-    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
-    mFileLabels.resize(mDims.d[0], 0);
-    reset(0);
-  }
-
-  // Resets data members
-  void reset(int firstBatch) override {
-    mBatchCount = 0;
-    mFileCount = 0;
-    mFileBatchPos = mDims.d[0];
-    skip(firstBatch);
-  }
-
-  // Advance to next batch and return true, or return false if there is no batch
-  // left.
-  bool next() override {
-    if (mBatchCount == mMaxBatches) {
-      return false;
-    }
-
-    for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
-         batchPos += csize, mFileBatchPos += csize) {
-      ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
-      if (mFileBatchPos == mDims.d[0] && !update()) {
-        return false;
-      }
-
-      // copy the smaller of: elements left to fulfill the request, or elements
-      // left in the file buffer.
-      csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
-      std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
-                  csize * mImageSize, getBatch() + batchPos * mImageSize);
-      std::copy_n(getFileLabels() + mFileBatchPos, csize,
-                  getLabels() + batchPos);
-    }
-    mBatchCount++;
-    return true;
-  }
-
-  // Skips the batches
-  void skip(int skipCount) override {
-    if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
-        mFileBatchPos == mDims.d[0]) {
-      mFileCount += skipCount * mBatchSize / mDims.d[0];
-      return;
-    }
-
-    int x = mBatchCount;
-    for (int i = 0; i < skipCount; i++) {
-      next();
-    }
-    mBatchCount = x;
-  }
-
-  float* getBatch() override { return mBatch.data(); }
-
-  float* getLabels() override { return mLabels.data(); }
-
-  int getBatchesRead() const override { return mBatchCount; }
-
-  int getBatchSize() const override { return mBatchSize; }
-
-  nvinfer1::Dims getDims() const override { return mDims; }
-
- private:
-  float* getFileBatch() { return mFileBatch.data(); }
-
-  float* getFileLabels() { return mFileLabels.data(); }
-
-  bool update() {
-    if (mListFile.empty()) {
-      std::string inputFileName = locateFile(
-          mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
-      FILE* file = fopen(inputFileName.c_str(), "rb");
-      if (!file) {
-        return false;
-      }
-
-      int d[4];
-      size_t readSize = fread(d, sizeof(int), 4, file);
-      ASSERT(readSize == 4);
-      ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
-             mDims.d[3] == d[3]);
-      size_t readInputCount =
-          fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
-      ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
-      size_t readLabelCount =
-          fread(getFileLabels(), sizeof(float), mDims.d[0], file);
-      ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
-
-      fclose(file);
-    } else {
-      std::vector<std::string> fNames;
-      std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
-      if (!file) {
-        return false;
-      }
-
-      sample::gLogInfo << "Batch #" << mFileCount << std::endl;
-      file.seekg(((mBatchCount * mBatchSize)) * 7);
-
-      for (int i = 1; i <= mBatchSize; i++) {
-        std::string sName;
-        std::getline(file, sName);
-        sName = sName + ".ppm";
-        sample::gLogInfo << "Calibrating with file " << sName << std::endl;
-        fNames.emplace_back(sName);
-      }
-
-      mFileCount++;
-
-      const int imageC = 3;
-      const int imageH = 300;
-      const int imageW = 300;
-      std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
-          fNames.size());
-      for (uint32_t i = 0; i < fNames.size(); ++i) {
-        readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
-      }
-
-      std::vector<float> data(samplesCommon::volume(mDims));
-      const float scale = 2.0 / 255.0;
-      const float bias = 1.0;
-      long int volChl = mDims.d[2] * mDims.d[3];
-
-      // Normalize input data
-      for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
-           i < mBatchSize; ++i) {
-        for (int c = 0; c < mDims.d[1]; ++c) {
-          for (int j = 0; j < volChl; ++j) {
-            data[i * volImg + c * volChl + j] =
-                scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
-          }
-        }
-      }
-
-      std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
-    }
-
-    mFileBatchPos = 0;
-    return true;
-  }
-
-  int mBatchSize{0};
-  int mMaxBatches{0};
-  int mBatchCount{0};
-  int mFileCount{0};
-  int mFileBatchPos{0};
-  int mImageSize{0};
-  std::vector<float> mBatch;      //!< Data for the batch
-  std::vector<float> mLabels;     //!< Labels for the batch
-  std::vector<float> mFileBatch;  //!< List of image files
-  std::vector<float> mFileLabels; //!< List of label files
-  std::string mPrefix;            //!< Batch file name prefix
-  std::string mSuffix;            //!< Batch file name suffix
-  nvinfer1::Dims mDims;           //!< Input dimensions
-  std::string mListFile;          //!< File name of the list of image names
-  std::vector<std::string>
-      mDataDir; //!< Directories where the files can be found
-};
-
-#endif
--- a/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
+++ b/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
@@ -1 +0,0 @@
-exclude_files=.*
--- a/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ENTROPY_CALIBRATOR_H
-#define ENTROPY_CALIBRATOR_H
-
-#include "BatchStream.h"
-#include "NvInfer.h"
-
-//! \class EntropyCalibratorImpl
-//!
-//! \brief Implements common functionality for Entropy calibrators.
-//!
-template <typename TBatchStream> class EntropyCalibratorImpl {
- public:
-  EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
-                        std::string networkName, const char* inputBlobName,
-                        bool readCache = true)
-      : mStream{stream},
-        mCalibrationTableName("CalibrationTable" + networkName),
-        mInputBlobName(inputBlobName), mReadCache(readCache) {
-    nvinfer1::Dims dims = mStream.getDims();
-    mInputCount = samplesCommon::volume(dims);
-    CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
-    mStream.reset(firstBatch);
-  }
-
-  virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
-
-  int getBatchSize() const noexcept { return mStream.getBatchSize(); }
-
-  bool getBatch(void* bindings[], const char* names[],
-                int nbBindings) noexcept {
-    if (!mStream.next()) {
-      return false;
-    }
-    CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
-                     mInputCount * sizeof(float), cudaMemcpyHostToDevice));
-    ASSERT(!strcmp(names[0], mInputBlobName));
-    bindings[0] = mDeviceInput;
-    return true;
-  }
-
-  const void* readCalibrationCache(size_t& length) noexcept {
-    mCalibrationCache.clear();
-    std::ifstream input(mCalibrationTableName, std::ios::binary);
-    input >> std::noskipws;
-    if (mReadCache && input.good()) {
-      std::copy(std::istream_iterator<char>(input),
-                std::istream_iterator<char>(),
-                std::back_inserter(mCalibrationCache));
-    }
-    length = mCalibrationCache.size();
-    return length ? mCalibrationCache.data() : nullptr;
-  }
-
-  void writeCalibrationCache(const void* cache, size_t length) noexcept {
-    std::ofstream output(mCalibrationTableName, std::ios::binary);
-    output.write(reinterpret_cast<const char*>(cache), length);
-  }
-
- private:
-  TBatchStream mStream;
-  size_t mInputCount;
-  std::string mCalibrationTableName;
-  const char* mInputBlobName;
-  bool mReadCache{true};
-  void* mDeviceInput{nullptr};
-  std::vector<char> mCalibrationCache;
-};
-
-//! \class Int8EntropyCalibrator2
-//!
-//! \brief Implements Entropy calibrator 2.
-//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
-//!
-template <typename TBatchStream>
-class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
- public:
-  Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
-                         const char* networkName, const char* inputBlobName,
-                         bool readCache = true)
-      : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
-
-  int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
-
-  bool getBatch(void* bindings[], const char* names[],
-                int nbBindings) noexcept override {
-    return mImpl.getBatch(bindings, names, nbBindings);
-  }
-
-  const void* readCalibrationCache(size_t& length) noexcept override {
-    return mImpl.readCalibrationCache(length);
-  }
-
-  void writeCalibrationCache(const void* cache,
-                             size_t length) noexcept override {
-    mImpl.writeCalibrationCache(cache, length);
-  }
-
- private:
-  EntropyCalibratorImpl<TBatchStream> mImpl;
-};
-
-#endif // ENTROPY_CALIBRATOR_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ERROR_RECORDER_H
-#define ERROR_RECORDER_H
-#include "NvInferRuntimeCommon.h"
-#include "logger.h"
-#include <atomic>
-#include <cstdint>
-#include <exception>
-#include <mutex>
-#include <vector>
-
-using nvinfer1::ErrorCode;
-using nvinfer1::IErrorRecorder;
-
-//!
-//! A simple implementation of the IErrorRecorder interface for
-//! use by samples. This interface also can be used as a reference
-//! implementation.
-//! The sample Error recorder is based on a vector that pairs the error
-//! code and the error string into a single element. It also uses
-//! standard mutex's and atomics in order to make sure that the code
-//! works in a multi-threaded environment.
-//!
-class SampleErrorRecorder : public IErrorRecorder {
-  using errorPair = std::pair<ErrorCode, std::string>;
-  using errorStack = std::vector<errorPair>;
-
- public:
-  SampleErrorRecorder() = default;
-
-  virtual ~SampleErrorRecorder() noexcept {}
-  int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
-  ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
-    return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
-                                       : (*this)[errorIdx].first;
-  };
-  IErrorRecorder::ErrorDesc
-  getErrorDesc(int32_t errorIdx) const noexcept final {
-    return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
-                                       : (*this)[errorIdx].second.c_str();
-  }
-  // This class can never overflow since we have dynamic resize via std::vector
-  // usage.
-  bool hasOverflowed() const noexcept final { return false; }
-
-  // Empty the errorStack.
-  void clear() noexcept final {
-    try {
-      // grab a lock so that there is no addition while clearing.
-      std::lock_guard<std::mutex> guard(mStackLock);
-      mErrorStack.clear();
-    } catch (const std::exception& e) {
-      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
-    }
-  };
-
-  //! Simple helper function that
-  bool empty() const noexcept { return mErrorStack.empty(); }
-
-  bool reportError(ErrorCode val,
-                   IErrorRecorder::ErrorDesc desc) noexcept final {
-    try {
-      std::lock_guard<std::mutex> guard(mStackLock);
-      sample::gLogError << "Error[" << static_cast<int32_t>(val)
-                        << "]: " << desc << std::endl;
-      mErrorStack.push_back(errorPair(val, desc));
-    } catch (const std::exception& e) {
-      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
-    }
-    // All errors are considered fatal.
-    return true;
-  }
-
-  // Atomically increment or decrement the ref counter.
-  IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
-  IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
-
- private:
-  // Simple helper functions.
-  const errorPair& operator[](size_t index) const noexcept {
-    return mErrorStack[index];
-  }
-
-  bool invalidIndexCheck(int32_t index) const noexcept {
-    // By converting signed to unsigned, we only need a single check since
-    // negative numbers turn into large positive greater than the size.
-    size_t sIndex = index;
-    return sIndex >= mErrorStack.size();
-  }
-  // Mutex to hold when locking mErrorStack.
-  std::mutex mStackLock;
-
-  // Reference count of the class. Destruction of the class when mRefCount
-  // is not zero causes undefined behavior.
-  std::atomic<int32_t> mRefCount{0};
-
-  // The error stack that holds the errors recorded by TensorRT.
-  errorStack mErrorStack;
-};     // class SampleErrorRecorder
-#endif // ERROR_RECORDER_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/README.md
+++ b/csrcs/fastdeploy/backends/tensorrt/common/README.md
@@ -1 +0,0 @@
-目录代码来源自 https://github.com/NVIDIA/TensorRT
--- a/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef TENSORRT_ARGS_PARSER_H
-#define TENSORRT_ARGS_PARSER_H
-
-#include <string>
-#include <vector>
-#ifdef _MSC_VER
-#include ".\windows\getopt.h"
-#else
-#include <getopt.h>
-#endif
-#include <iostream>
-
-namespace samplesCommon {
-
-//!
-//! \brief The SampleParams structure groups the basic parameters required by
-//!        all sample networks.
-//!
-struct SampleParams {
-  int32_t batchSize{1}; //!< Number of inputs in a batch
-  int32_t dlaCore{-1};  //!< Specify the DLA core to run network on.
-  bool int8{false};     //!< Allow runnning the network in Int8 mode.
-  bool fp16{false};     //!< Allow running the network in FP16 mode.
-  std::vector<std::string>
-      dataDirs; //!< Directory paths where sample data files are stored
-  std::vector<std::string> inputTensorNames;
-  std::vector<std::string> outputTensorNames;
-};
-
-//!
-//! \brief The CaffeSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use caffe
-//!
-struct CaffeSampleParams : public SampleParams {
-  std::string
-      prototxtFileName; //!< Filename of prototxt design file of a network
-  std::string
-      weightsFileName;      //!< Filename of trained weights file of a network
-  std::string meanFileName; //!< Filename of mean file of a network
-};
-
-//!
-//! \brief The OnnxSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use ONNX
-//!
-struct OnnxSampleParams : public SampleParams {
-  std::string onnxFileName; //!< Filename of ONNX file of a network
-};
-
-//!
-//! \brief The UffSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use Uff
-//!
-struct UffSampleParams : public SampleParams {
-  std::string uffFileName; //!< Filename of uff file of a network
-};
-
-//!
-//! /brief Struct to maintain command-line arguments.
-//!
-struct Args {
-  bool runInInt8{false};
-  bool runInFp16{false};
-  bool help{false};
-  int32_t useDLACore{-1};
-  int32_t batch{1};
-  std::vector<std::string> dataDirs;
-  std::string saveEngine;
-  std::string loadEngine;
-  bool useILoop{false};
-};
-
-//!
-//! \brief Populates the Args struct with the provided command-line parameters.
-//!
-//! \throw invalid_argument if any of the arguments are not valid
-//!
-//! \return boolean If return value is true, execution can continue, otherwise
-//! program should exit
-//!
-inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
-  while (1) {
-    int32_t arg;
-    static struct option long_options[] = {
-        {"help", no_argument, 0, 'h'},
-        {"datadir", required_argument, 0, 'd'},
-        {"int8", no_argument, 0, 'i'},
-        {"fp16", no_argument, 0, 'f'},
-        {"useILoop", no_argument, 0, 'l'},
-        {"saveEngine", required_argument, 0, 's'},
-        {"loadEngine", no_argument, 0, 'o'},
-        {"useDLACore", required_argument, 0, 'u'},
-        {"batch", required_argument, 0, 'b'},
-        {nullptr, 0, nullptr, 0}};
-    int32_t option_index = 0;
-    arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
-    if (arg == -1) {
-      break;
-    }
-
-    switch (arg) {
-    case 'h':
-      args.help = true;
-      return true;
-    case 'd':
-      if (optarg) {
-        args.dataDirs.push_back(optarg);
-      } else {
-        std::cerr << "ERROR: --datadir requires option argument" << std::endl;
-        return false;
-      }
-      break;
-    case 's':
-      if (optarg) {
-        args.saveEngine = optarg;
-      }
-      break;
-    case 'o':
-      if (optarg) {
-        args.loadEngine = optarg;
-      }
-      break;
-    case 'i':
-      args.runInInt8 = true;
-      break;
-    case 'f':
-      args.runInFp16 = true;
-      break;
-    case 'l':
-      args.useILoop = true;
-      break;
-    case 'u':
-      if (optarg) {
-        args.useDLACore = std::stoi(optarg);
-      }
-      break;
-    case 'b':
-      if (optarg) {
-        args.batch = std::stoi(optarg);
-      }
-      break;
-    default:
-      return false;
-    }
-  }
-  return true;
-}
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_ARGS_PARSER_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/buffers.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/buffers.h
@@ -1,426 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef TENSORRT_BUFFERS_H
-#define TENSORRT_BUFFERS_H
-
-#include "NvInfer.h"
-#include "common.h"
-#include "half.h"
-#include <cassert>
-#include <cuda_runtime_api.h>
-#include <iostream>
-#include <iterator>
-#include <memory>
-#include <new>
-#include <numeric>
-#include <string>
-#include <vector>
-
-namespace samplesCommon {
-
-//!
-//! \brief  The GenericBuffer class is a templated class for buffers.
-//!
-//! \details This templated RAII (Resource Acquisition Is Initialization) class
-//! handles the allocation,
-//!          deallocation, querying of buffers on both the device and the host.
-//!          It can handle data of arbitrary types because it stores byte
-//!          buffers.
-//!          The template parameters AllocFunc and FreeFunc are used for the
-//!          allocation and deallocation of the buffer.
-//!          AllocFunc must be a functor that takes in (void** ptr, size_t size)
-//!          and returns bool. ptr is a pointer to where the allocated buffer
-//!          address should be stored.
-//!          size is the amount of memory in bytes to allocate.
-//!          The boolean indicates whether or not the memory allocation was
-//!          successful.
-//!          FreeFunc must be a functor that takes in (void* ptr) and returns
-//!          void.
-//!          ptr is the allocated buffer address. It must work with nullptr
-//!          input.
-//!
-template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
- public:
-  //!
-  //! \brief Construct an empty buffer.
-  //!
-  GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
-      : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
-
-  //!
-  //! \brief Construct a buffer with the specified allocation size in bytes.
-  //!
-  GenericBuffer(size_t size, nvinfer1::DataType type)
-      : mSize(size), mCapacity(size), mType(type) {
-    if (!allocFn(&mBuffer, this->nbBytes())) {
-      throw std::bad_alloc();
-    }
-  }
-
-  GenericBuffer(GenericBuffer&& buf)
-      : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
-        mBuffer(buf.mBuffer) {
-    buf.mSize = 0;
-    buf.mCapacity = 0;
-    buf.mType = nvinfer1::DataType::kFLOAT;
-    buf.mBuffer = nullptr;
-  }
-
-  GenericBuffer& operator=(GenericBuffer&& buf) {
-    if (this != &buf) {
-      freeFn(mBuffer);
-      mSize = buf.mSize;
-      mCapacity = buf.mCapacity;
-      mType = buf.mType;
-      mBuffer = buf.mBuffer;
-      // Reset buf.
-      buf.mSize = 0;
-      buf.mCapacity = 0;
-      buf.mBuffer = nullptr;
-    }
-    return *this;
-  }
-
-  //!
-  //! \brief Returns pointer to underlying array.
-  //!
-  void* data() { return mBuffer; }
-
-  //!
-  //! \brief Returns pointer to underlying array.
-  //!
-  const void* data() const { return mBuffer; }
-
-  //!
-  //! \brief Returns the size (in number of elements) of the buffer.
-  //!
-  size_t size() const { return mSize; }
-
-  //!
-  //! \brief Returns the size (in bytes) of the buffer.
-  //!
-  size_t nbBytes() const {
-    return this->size() * samplesCommon::getElementSize(mType);
-  }
-
-  //!
-  //! \brief Resizes the buffer. This is a no-op if the new size is smaller than
-  //! or equal to the current capacity.
-  //!
-  void resize(size_t newSize) {
-    mSize = newSize;
-    if (mCapacity < newSize) {
-      freeFn(mBuffer);
-      if (!allocFn(&mBuffer, this->nbBytes())) {
-        throw std::bad_alloc{};
-      }
-      mCapacity = newSize;
-    }
-  }
-
-  //!
-  //! \brief Overload of resize that accepts Dims
-  //!
-  void resize(const nvinfer1::Dims& dims) {
-    return this->resize(samplesCommon::volume(dims));
-  }
-
-  ~GenericBuffer() { freeFn(mBuffer); }
-
- private:
-  size_t mSize{0}, mCapacity{0};
-  nvinfer1::DataType mType;
-  void* mBuffer;
-  AllocFunc allocFn;
-  FreeFunc freeFn;
-};
-
-class DeviceAllocator {
- public:
-  bool operator()(void** ptr, size_t size) const {
-    return cudaMalloc(ptr, size) == cudaSuccess;
-  }
-};
-
-class DeviceFree {
- public:
-  void operator()(void* ptr) const { cudaFree(ptr); }
-};
-
-class HostAllocator {
- public:
-  bool operator()(void** ptr, size_t size) const {
-    *ptr = malloc(size);
-    return *ptr != nullptr;
-  }
-};
-
-class HostFree {
- public:
-  void operator()(void* ptr) const { free(ptr); }
-};
-
-using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
-using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
-
-//!
-//! \brief  The ManagedBuffer class groups together a pair of corresponding
-//! device and host buffers.
-//!
-class ManagedBuffer {
- public:
-  DeviceBuffer deviceBuffer;
-  HostBuffer hostBuffer;
-};
-
-//!
-//! \brief  The BufferManager class handles host and device buffer allocation
-//! and deallocation.
-//!
-//! \details This RAII class handles host and device buffer allocation and
-//! deallocation,
-//!          memcpy between host and device buffers to aid with inference,
-//!          and debugging dumps to validate inference. The BufferManager class
-//!          is meant to be
-//!          used to simplify buffer management and any interactions between
-//!          buffers and the engine.
-//!
-class BufferManager {
- public:
-  static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
-
-  //!
-  //! \brief Create a BufferManager for handling buffer interactions with
-  //! engine.
-  //!
-  BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
-                const int batchSize = 0,
-                const nvinfer1::IExecutionContext* context = nullptr)
-      : mEngine(engine), mBatchSize(batchSize) {
-    // Full Dims implies no batch size.
-    assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
-    // Create host and device buffers
-    for (int i = 0; i < mEngine->getNbBindings(); i++) {
-      auto dims = context ? context->getBindingDimensions(i)
-                          : mEngine->getBindingDimensions(i);
-      size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
-      nvinfer1::DataType type = mEngine->getBindingDataType(i);
-      int vecDim = mEngine->getBindingVectorizedDim(i);
-      if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
-      {
-        int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
-        dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
-        vol *= scalarsPerVec;
-      }
-      vol *= samplesCommon::volume(dims);
-      std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
-      manBuf->deviceBuffer = DeviceBuffer(vol, type);
-      manBuf->hostBuffer = HostBuffer(vol, type);
-      mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
-      mManagedBuffers.emplace_back(std::move(manBuf));
-    }
-  }
-
-  //!
-  //! \brief Returns a vector of device buffers that you can use directly as
-  //!        bindings for the execute and enqueue methods of IExecutionContext.
-  //!
-  std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
-
-  //!
-  //! \brief Returns a vector of device buffers.
-  //!
-  const std::vector<void*>& getDeviceBindings() const {
-    return mDeviceBindings;
-  }
-
-  //!
-  //! \brief Returns the device buffer corresponding to tensorName.
-  //!        Returns nullptr if no such tensor can be found.
-  //!
-  void* getDeviceBuffer(const std::string& tensorName) const {
-    return getBuffer(false, tensorName);
-  }
-
-  //!
-  //! \brief Returns the host buffer corresponding to tensorName.
-  //!        Returns nullptr if no such tensor can be found.
-  //!
-  void* getHostBuffer(const std::string& tensorName) const {
-    return getBuffer(true, tensorName);
-  }
-
-  //!
-  //! \brief Returns the size of the host and device buffers that correspond to
-  //! tensorName.
-  //!        Returns kINVALID_SIZE_VALUE if no such tensor can be found.
-  //!
-  size_t size(const std::string& tensorName) const {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1)
-      return kINVALID_SIZE_VALUE;
-    return mManagedBuffers[index]->hostBuffer.nbBytes();
-  }
-
-  //!
-  //! \brief Dump host buffer with specified tensorName to ostream.
-  //!        Prints error message to std::ostream if no such tensor can be
-  //!        found.
-  //!
-  void dumpBuffer(std::ostream& os, const std::string& tensorName) {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1) {
-      os << "Invalid tensor name" << std::endl;
-      return;
-    }
-    void* buf = mManagedBuffers[index]->hostBuffer.data();
-    size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
-    nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
-    size_t rowCount = static_cast<size_t>(
-        bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
-    int leadDim = mBatchSize;
-    int* trailDims = bufDims.d;
-    int nbDims = bufDims.nbDims;
-
-    // Fix explicit Dimension networks
-    if (!leadDim && nbDims > 0) {
-      leadDim = bufDims.d[0];
-      ++trailDims;
-      --nbDims;
-    }
-
-    os << "[" << leadDim;
-    for (int i = 0; i < nbDims; i++)
-      os << ", " << trailDims[i];
-    os << "]" << std::endl;
-    switch (mEngine->getBindingDataType(index)) {
-    case nvinfer1::DataType::kINT32:
-      print<int32_t>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kFLOAT:
-      print<float>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kHALF:
-      print<half_float::half>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kINT8:
-      assert(0 && "Int8 network-level input and output is not supported");
-      break;
-    case nvinfer1::DataType::kBOOL:
-      assert(0 && "Bool network-level input and output are not supported");
-      break;
-    }
-  }
-
-  //!
-  //! \brief Templated print function that dumps buffers of arbitrary type to
-  //! std::ostream.
-  //!        rowCount parameter controls how many elements are on each line.
-  //!        A rowCount of 1 means that there is only 1 element on each line.
-  //!
-  template <typename T>
-  void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
-    assert(rowCount != 0);
-    assert(bufSize % sizeof(T) == 0);
-    T* typedBuf = static_cast<T*>(buf);
-    size_t numItems = bufSize / sizeof(T);
-    for (int i = 0; i < static_cast<int>(numItems); i++) {
-      // Handle rowCount == 1 case
-      if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
-        os << typedBuf[i] << std::endl;
-      else if (rowCount == 1)
-        os << typedBuf[i];
-      // Handle rowCount > 1 case
-      else if (i % rowCount == 0)
-        os << typedBuf[i];
-      else if (i % rowCount == rowCount - 1)
-        os << " " << typedBuf[i] << std::endl;
-      else
-        os << " " << typedBuf[i];
-    }
-  }
-
-  //!
-  //! \brief Copy the contents of input host buffers to input device buffers
-  //! synchronously.
-  //!
-  void copyInputToDevice() { memcpyBuffers(true, false, false); }
-
-  //!
-  //! \brief Copy the contents of output device buffers to output host buffers
-  //! synchronously.
-  //!
-  void copyOutputToHost() { memcpyBuffers(false, true, false); }
-
-  //!
-  //! \brief Copy the contents of input host buffers to input device buffers
-  //! asynchronously.
-  //!
-  void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
-    memcpyBuffers(true, false, true, stream);
-  }
-
-  //!
-  //! \brief Copy the contents of output device buffers to output host buffers
-  //! asynchronously.
-  //!
-  void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
-    memcpyBuffers(false, true, true, stream);
-  }
-
-  ~BufferManager() = default;
-
- private:
-  void* getBuffer(const bool isHost, const std::string& tensorName) const {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1)
-      return nullptr;
-    return (isHost ? mManagedBuffers[index]->hostBuffer.data()
-                   : mManagedBuffers[index]->deviceBuffer.data());
-  }
-
-  void memcpyBuffers(const bool copyInput, const bool deviceToHost,
-                     const bool async, const cudaStream_t& stream = 0) {
-    for (int i = 0; i < mEngine->getNbBindings(); i++) {
-      void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
-                                  : mManagedBuffers[i]->deviceBuffer.data();
-      const void* srcPtr = deviceToHost
-                               ? mManagedBuffers[i]->deviceBuffer.data()
-                               : mManagedBuffers[i]->hostBuffer.data();
-      const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
-      const cudaMemcpyKind memcpyType =
-          deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
-      if ((copyInput && mEngine->bindingIsInput(i)) ||
-          (!copyInput && !mEngine->bindingIsInput(i))) {
-        if (async)
-          CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
-        else
-          CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
-      }
-    }
-  }
-
-  std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
-  int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
-  std::vector<std::unique_ptr<ManagedBuffer>>
-      mManagedBuffers; //!< The vector of pointers to managed buffers
-  std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
-                                      //! for engine execution
-};
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_BUFFERS_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/common.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/common.h
@@ -1,844 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_COMMON_H
-#define TENSORRT_COMMON_H
-
-// For loadLibrary
-#ifdef _MSC_VER
-// Needed so that the max/min definitions in windows.h do not conflict with
-// std::max/min.
-#define NOMINMAX
-#include <windows.h>
-#undef NOMINMAX
-#else
-#include <dlfcn.h>
-#endif
-
-#include "NvInfer.h"
-#include "NvInferPlugin.h"
-#include "logger.h"
-#include <algorithm>
-#include <cassert>
-#include <chrono>
-#include <cmath>
-#include <cstring>
-#include <cuda_runtime_api.h>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <new>
-#include <numeric>
-#include <ratio>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "safeCommon.h"
-
-using namespace nvinfer1;
-using namespace plugin;
-
-#ifdef _MSC_VER
-#define FN_NAME __FUNCTION__
-#else
-#define FN_NAME __func__
-#endif
-
-#if defined(__aarch64__) || defined(__QNX__)
-#define ENABLE_DLA_API 1
-#endif
-
-#define CHECK_RETURN_W_MSG(status, val, errMsg)                                \
-  do {                                                                         \
-    if (!(status)) {                                                           \
-      sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
-                        << FN_NAME << "(), line " << __LINE__ << std::endl;    \
-      return val;                                                              \
-    }                                                                          \
-  } while (0)
-
-#undef ASSERT
-#define ASSERT(condition)                                                      \
-  do {                                                                         \
-    if (!(condition)) {                                                        \
-      sample::gLogError << "Assertion failure: " << #condition << std::endl;   \
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
-
-#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
-
-template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
-  CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
-  auto deleter = [](T* t) { t->destroy(); };
-  return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
-}
-
-constexpr long double operator"" _GiB(long double val) {
-  return val * (1 << 30);
-}
-constexpr long double operator"" _MiB(long double val) {
-  return val * (1 << 20);
-}
-constexpr long double operator"" _KiB(long double val) {
-  return val * (1 << 10);
-}
-
-// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
-// Since the return type is signed, -1_GiB will work as expected.
-constexpr long long int operator"" _GiB(unsigned long long val) {
-  return val * (1 << 30);
-}
-constexpr long long int operator"" _MiB(unsigned long long val) {
-  return val * (1 << 20);
-}
-constexpr long long int operator"" _KiB(unsigned long long val) {
-  return val * (1 << 10);
-}
-
-struct SimpleProfiler : public nvinfer1::IProfiler {
-  struct Record {
-    float time{0};
-    int count{0};
-  };
-
-  virtual void reportLayerTime(const char* layerName, float ms) noexcept {
-    mProfile[layerName].count++;
-    mProfile[layerName].time += ms;
-    if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
-        mLayerNames.end()) {
-      mLayerNames.push_back(layerName);
-    }
-  }
-
-  SimpleProfiler(const char* name,
-                 const std::vector<SimpleProfiler>& srcProfilers =
-                     std::vector<SimpleProfiler>())
-      : mName(name) {
-    for (const auto& srcProfiler : srcProfilers) {
-      for (const auto& rec : srcProfiler.mProfile) {
-        auto it = mProfile.find(rec.first);
-        if (it == mProfile.end()) {
-          mProfile.insert(rec);
-        } else {
-          it->second.time += rec.second.time;
-          it->second.count += rec.second.count;
-        }
-      }
-    }
-  }
-
-  friend std::ostream& operator<<(std::ostream& out,
-                                  const SimpleProfiler& value) {
-    out << "========== " << value.mName << " profile ==========" << std::endl;
-    float totalTime = 0;
-    std::string layerNameStr = "TensorRT layer name";
-    int maxLayerNameLength =
-        std::max(static_cast<int>(layerNameStr.size()), 70);
-    for (const auto& elem : value.mProfile) {
-      totalTime += elem.second.time;
-      maxLayerNameLength =
-          std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
-    }
-
-    auto old_settings = out.flags();
-    auto old_precision = out.precision();
-    // Output header
-    {
-      out << std::setw(maxLayerNameLength) << layerNameStr << " ";
-      out << std::setw(12) << "Runtime, "
-          << "%"
-          << " ";
-      out << std::setw(12) << "Invocations"
-          << " ";
-      out << std::setw(12) << "Runtime, ms" << std::endl;
-    }
-    for (size_t i = 0; i < value.mLayerNames.size(); i++) {
-      const std::string layerName = value.mLayerNames[i];
-      auto elem = value.mProfile.at(layerName);
-      out << std::setw(maxLayerNameLength) << layerName << " ";
-      out << std::setw(12) << std::fixed << std::setprecision(1)
-          << (elem.time * 100.0F / totalTime) << "%"
-          << " ";
-      out << std::setw(12) << elem.count << " ";
-      out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
-          << std::endl;
-    }
-    out.flags(old_settings);
-    out.precision(old_precision);
-    out << "========== " << value.mName << " total runtime = " << totalTime
-        << " ms ==========" << std::endl;
-
-    return out;
-  }
-
- private:
-  std::string mName;
-  std::vector<std::string> mLayerNames;
-  std::map<std::string, Record> mProfile;
-};
-
-//! Locate path to file, given its filename or filepath suffix and possible dirs
-//! it might lie in.
-//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
-//! file path.
-inline std::string locateFile(const std::string& filepathSuffix,
-                              const std::vector<std::string>& directories,
-                              bool reportError = true) {
-  const int MAX_DEPTH{10};
-  bool found{false};
-  std::string filepath;
-
-  for (auto& dir : directories) {
-    if (!dir.empty() && dir.back() != '/') {
-#ifdef _MSC_VER
-      filepath = dir + "\\" + filepathSuffix;
-#else
-      filepath = dir + "/" + filepathSuffix;
-#endif
-    } else {
-      filepath = dir + filepathSuffix;
-    }
-
-    for (int i = 0; i < MAX_DEPTH && !found; i++) {
-      const std::ifstream checkFile(filepath);
-      found = checkFile.is_open();
-      if (found) {
-        break;
-      }
-
-      filepath = "../" + filepath; // Try again in parent dir
-    }
-
-    if (found) {
-      break;
-    }
-
-    filepath.clear();
-  }
-
-  // Could not find the file
-  if (filepath.empty()) {
-    const std::string dirList = std::accumulate(
-        directories.begin() + 1, directories.end(), directories.front(),
-        [](const std::string& a, const std::string& b) {
-          return a + "\n\t" + b;
-        });
-    std::cout << "Could not find " << filepathSuffix
-              << " in data directories:\n\t" << dirList << std::endl;
-
-    if (reportError) {
-      std::cout << "&&&& FAILED" << std::endl;
-      exit(EXIT_FAILURE);
-    }
-  }
-
-  return filepath;
-}
-
-inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
-                        int inW) {
-  std::ifstream infile(fileName, std::ifstream::binary);
-  assert(infile.is_open() &&
-         "Attempting to read from a file that is not open.");
-  std::string magic, h, w, max;
-  infile >> magic >> h >> w >> max;
-  infile.seekg(1, infile.cur);
-  infile.read(reinterpret_cast<char*>(buffer), inH * inW);
-}
-
-namespace samplesCommon {
-
-// Swaps endianness of an integral type.
-template <typename T,
-          typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-inline T swapEndianness(const T& value) {
-  uint8_t bytes[sizeof(T)];
-  for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
-    bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
-  }
-  return *reinterpret_cast<T*>(bytes);
-}
-
-class HostMemory {
- public:
-  HostMemory() = delete;
-  virtual void* data() const noexcept { return mData; }
-  virtual std::size_t size() const noexcept { return mSize; }
-  virtual DataType type() const noexcept { return mType; }
-  virtual ~HostMemory() {}
-
- protected:
-  HostMemory(std::size_t size, DataType type)
-      : mData{nullptr}, mSize(size), mType(type) {}
-  void* mData;
-  std::size_t mSize;
-  DataType mType;
-};
-
-template <typename ElemType, DataType dataType>
-class TypedHostMemory : public HostMemory {
- public:
-  explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
-    mData = new ElemType[size];
-  };
-  ~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
-  ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
-};
-
-using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
-using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
-using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
-
-inline void* safeCudaMalloc(size_t memSize) {
-  void* deviceMem;
-  CHECK(cudaMalloc(&deviceMem, memSize));
-  if (deviceMem == nullptr) {
-    std::cerr << "Out of memory" << std::endl;
-    exit(1);
-  }
-  return deviceMem;
-}
-
-inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
-
-struct InferDeleter {
-  template <typename T> void operator()(T* obj) const { delete obj; }
-};
-
-template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
-
-static auto StreamDeleter = [](cudaStream_t* pStream) {
-  if (pStream) {
-    cudaStreamDestroy(*pStream);
-    delete pStream;
-  }
-};
-
-inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
-  std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
-      new cudaStream_t, StreamDeleter);
-  if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
-      cudaSuccess) {
-    pStream.reset(nullptr);
-  }
-
-  return pStream;
-}
-
-//! Return vector of indices that puts magnitudes of sequence in descending
-//! order.
-template <class Iter>
-std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
-  std::vector<size_t> indices(end - begin);
-  std::iota(indices.begin(), indices.end(), 0);
-  std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
-    return std::abs(begin[j]) < std::abs(begin[i]);
-  });
-  return indices;
-}
-
-inline bool readReferenceFile(const std::string& fileName,
-                              std::vector<std::string>& refVector) {
-  std::ifstream infile(fileName);
-  if (!infile.is_open()) {
-    std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
-                 "that is not open."
-              << std::endl;
-    return false;
-  }
-  std::string line;
-  while (std::getline(infile, line)) {
-    if (line.empty())
-      continue;
-    refVector.push_back(line);
-  }
-  infile.close();
-  return true;
-}
-
-template <typename T>
-std::vector<std::string> classify(const std::vector<std::string>& refVector,
-                                  const std::vector<T>& output,
-                                  const size_t topK) {
-  const auto inds =
-      samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
-  std::vector<std::string> result;
-  result.reserve(topK);
-  for (size_t k = 0; k < topK; ++k) {
-    result.push_back(refVector[inds[k]]);
-  }
-  return result;
-}
-
-// Returns indices of highest K magnitudes in v.
-template <typename T>
-std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
-  std::vector<size_t> indices =
-      samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
-  indices.resize(k);
-  return indices;
-}
-
-template <typename T>
-bool readASCIIFile(const std::string& fileName, const size_t size,
-                   std::vector<T>& out) {
-  std::ifstream infile(fileName);
-  if (!infile.is_open()) {
-    std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
-                 "not open."
-              << std::endl;
-    return false;
-  }
-  out.clear();
-  out.reserve(size);
-  out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
-  infile.close();
-  return true;
-}
-
-template <typename T>
-bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
-  std::ofstream outfile(fileName);
-  if (!outfile.is_open()) {
-    std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
-                 "not open."
-              << std::endl;
-    return false;
-  }
-  for (auto fn : in) {
-    outfile << fn << "\n";
-  }
-  outfile.close();
-  return true;
-}
-
-inline void print_version() {
-  std::cout << "  TensorRT version: " << NV_TENSORRT_MAJOR << "."
-            << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
-            << NV_TENSORRT_BUILD << std::endl;
-}
-
-inline std::string getFileType(const std::string& filepath) {
-  return filepath.substr(filepath.find_last_of(".") + 1);
-}
-
-inline std::string toLower(const std::string& inp) {
-  std::string out = inp;
-  std::transform(out.begin(), out.end(), out.begin(), ::tolower);
-  return out;
-}
-
-inline float getMaxValue(const float* buffer, int64_t size) {
-  assert(buffer != nullptr);
-  assert(size > 0);
-  return *std::max_element(buffer, buffer + size);
-}
-
-// Ensures that every tensor used by a network has a dynamic range set.
-//
-// All tensors in a network must have a dynamic range specified if a calibrator
-// is not used.
-// This function is just a utility to globally fill in missing scales and
-// zero-points for the entire network.
-//
-// If a tensor does not have a dyanamic range set, it is assigned inRange or
-// outRange as follows:
-//
-// * If the tensor is the input to a layer or output of a pooling node, its
-// dynamic range is derived from inRange.
-// * Otherwise its dynamic range is derived from outRange.
-//
-// The default parameter values are intended to demonstrate, for final layers in
-// the network,
-// cases where dynamic ranges are asymmetric.
-//
-// The default parameter values choosen arbitrarily. Range values should be
-// choosen such that
-// we avoid underflow or overflow. Also range value should be non zero to avoid
-// uniform zero scale tensor.
-inline void setAllDynamicRanges(INetworkDefinition* network,
-                                float inRange = 2.0f, float outRange = 4.0f) {
-  // Ensure that all layer inputs have a scale.
-  for (int i = 0; i < network->getNbLayers(); i++) {
-    auto layer = network->getLayer(i);
-    for (int j = 0; j < layer->getNbInputs(); j++) {
-      ITensor* input{layer->getInput(j)};
-      // Optional inputs are nullptr here and are from RNN layers.
-      if (input != nullptr && !input->dynamicRangeIsSet()) {
-        ASSERT(input->setDynamicRange(-inRange, inRange));
-      }
-    }
-  }
-
-  // Ensure that all layer outputs have a scale.
-  // Tensors that are also inputs to layers are ingored here
-  // since the previous loop nest assigned scales to them.
-  for (int i = 0; i < network->getNbLayers(); i++) {
-    auto layer = network->getLayer(i);
-    for (int j = 0; j < layer->getNbOutputs(); j++) {
-      ITensor* output{layer->getOutput(j)};
-      // Optional outputs are nullptr here and are from RNN layers.
-      if (output != nullptr && !output->dynamicRangeIsSet()) {
-        // Pooling must have the same input and output scales.
-        if (layer->getType() == LayerType::kPOOLING) {
-          ASSERT(output->setDynamicRange(-inRange, inRange));
-        } else {
-          ASSERT(output->setDynamicRange(-outRange, outRange));
-        }
-      }
-    }
-  }
-}
-
-inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
-                                      INetworkDefinition* n) {
-  // Set dummy per-tensor dynamic range if Int8 mode is requested.
-  if (c->getFlag(BuilderFlag::kINT8)) {
-    sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
-                           "per-tensor dynamic range. Int8 accuracy is not "
-                           "guaranteed."
-                        << std::endl;
-    setAllDynamicRanges(n);
-  }
-}
-
-inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
-                      bool allowGPUFallback = true) {
-  if (useDLACore >= 0) {
-    if (builder->getNbDLACores() == 0) {
-      std::cerr << "Trying to use DLA core " << useDLACore
-                << " on a platform that doesn't have any DLA cores"
-                << std::endl;
-      assert(
-          "Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
-          false);
-    }
-    if (allowGPUFallback) {
-      config->setFlag(BuilderFlag::kGPU_FALLBACK);
-    }
-    if (!config->getFlag(BuilderFlag::kINT8)) {
-      // User has not requested INT8 Mode.
-      // By default run in FP16 mode. FP32 mode is not permitted.
-      config->setFlag(BuilderFlag::kFP16);
-    }
-    config->setDefaultDeviceType(DeviceType::kDLA);
-    config->setDLACore(useDLACore);
-  }
-}
-
-inline int32_t parseDLA(int32_t argc, char** argv) {
-  for (int32_t i = 1; i < argc; i++) {
-    if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
-      return std::stoi(argv[i] + 13);
-    }
-  }
-  return -1;
-}
-
-inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
-  switch (t) {
-  case nvinfer1::DataType::kINT32:
-    return 4;
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kBOOL:
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  }
-  return 0;
-}
-
-inline int64_t volume(const nvinfer1::Dims& d) {
-  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
-}
-
-template <int C, int H, int W> struct PPM {
-  std::string magic, fileName;
-  int h, w, max;
-  uint8_t buffer[C * H * W];
-};
-
-// New vPPM(variable sized PPM) class with variable dimensions.
-struct vPPM {
-  std::string magic, fileName;
-  int h, w, max;
-  std::vector<uint8_t> buffer;
-};
-
-struct BBox {
-  float x1, y1, x2, y2;
-};
-
-template <int C, int H, int W>
-void readPPMFile(const std::string& filename,
-                 samplesCommon::PPM<C, H, W>& ppm) {
-  ppm.fileName = filename;
-  std::ifstream infile(filename, std::ifstream::binary);
-  assert(infile.is_open() &&
-         "Attempting to read from a file that is not open.");
-  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
-  infile.seekg(1, infile.cur);
-  infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
-}
-
-inline void readPPMFile(const std::string& filename, vPPM& ppm,
-                        std::vector<std::string>& input_dir) {
-  ppm.fileName = filename;
-  std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
-  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
-  infile.seekg(1, infile.cur);
-
-  for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
-    ppm.buffer.push_back(0);
-  }
-
-  infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
-}
-
-template <int C, int H, int W>
-void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
-                          const BBox& bbox) {
-  std::ofstream outfile("./" + filename, std::ofstream::binary);
-  assert(!outfile.fail());
-  outfile << "P6"
-          << "\n"
-          << ppm.w << " " << ppm.h << "\n"
-          << ppm.max << "\n";
-
-  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
-  const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
-  const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
-  const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
-  const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
-
-  for (int x = x1; x <= x2; ++x) {
-    // bbox top border
-    ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
-    ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
-    ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
-    // bbox bottom border
-    ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
-    ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
-    ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
-  }
-
-  for (int y = y1; y <= y2; ++y) {
-    // bbox left border
-    ppm.buffer[(y * ppm.w + x1) * 3] = 255;
-    ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
-    ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
-    // bbox right border
-    ppm.buffer[(y * ppm.w + x2) * 3] = 255;
-    ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
-    ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
-  }
-
-  outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
-}
-
-inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
-                                 std::vector<BBox>& dets) {
-  std::ofstream outfile("./" + filename, std::ofstream::binary);
-  assert(!outfile.fail());
-  outfile << "P6"
-          << "\n"
-          << ppm.w << " " << ppm.h << "\n"
-          << ppm.max << "\n";
-  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
-
-  for (auto bbox : dets) {
-    for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
-      // bbox top border
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
-      // bbox bottom border
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
-    }
-
-    for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
-      // bbox left border
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
-      // bbox right border
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
-    }
-  }
-
-  outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
-}
-
-class TimerBase {
- public:
-  virtual void start() {}
-  virtual void stop() {}
-  float microseconds() const noexcept { return mMs * 1000.f; }
-  float milliseconds() const noexcept { return mMs; }
-  float seconds() const noexcept { return mMs / 1000.f; }
-  void reset() noexcept { mMs = 0.f; }
-
- protected:
-  float mMs{0.0f};
-};
-
-class GpuTimer : public TimerBase {
- public:
-  explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
-    CHECK(cudaEventCreate(&mStart));
-    CHECK(cudaEventCreate(&mStop));
-  }
-  ~GpuTimer() {
-    CHECK(cudaEventDestroy(mStart));
-    CHECK(cudaEventDestroy(mStop));
-  }
-  void start() { CHECK(cudaEventRecord(mStart, mStream)); }
-  void stop() {
-    CHECK(cudaEventRecord(mStop, mStream));
-    float ms{0.0f};
-    CHECK(cudaEventSynchronize(mStop));
-    CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
-    mMs += ms;
-  }
-
- private:
-  cudaEvent_t mStart, mStop;
-  cudaStream_t mStream;
-}; // class GpuTimer
-
-template <typename Clock> class CpuTimer : public TimerBase {
- public:
-  using clock_type = Clock;
-
-  void start() { mStart = Clock::now(); }
-  void stop() {
-    mStop = Clock::now();
-    mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
-  }
-
- private:
-  std::chrono::time_point<Clock> mStart, mStop;
-}; // class CpuTimer
-
-using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
-
-inline std::vector<std::string> splitString(std::string str,
-                                            char delimiter = ',') {
-  std::vector<std::string> splitVect;
-  std::stringstream ss(str);
-  std::string substr;
-
-  while (ss.good()) {
-    getline(ss, substr, delimiter);
-    splitVect.emplace_back(std::move(substr));
-  }
-  return splitVect;
-}
-
-// Return m rounded up to nearest multiple of n
-inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
-
-inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
-
-inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
-
-inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
-
-inline void loadLibrary(const std::string& path) {
-#ifdef _MSC_VER
-  void* handle = LoadLibrary(path.c_str());
-#else
-  int32_t flags{RTLD_LAZY};
-#if ENABLE_ASAN
-  // https://github.com/google/sanitizers/issues/89
-  // asan doesn't handle module unloading correctly and there are no plans on
-  // doing
-  // so. In order to get proper stack traces, don't delete the shared library on
-  // close so that asan can resolve the symbols correctly.
-  flags |= RTLD_NODELETE;
-#endif // ENABLE_ASAN
-
-  void* handle = dlopen(path.c_str(), flags);
-#endif
-  if (handle == nullptr) {
-#ifdef _MSC_VER
-    sample::gLogError << "Could not load plugin library: " << path << std::endl;
-#else
-    sample::gLogError << "Could not load plugin library: " << path
-                      << ", due to: " << dlerror() << std::endl;
-#endif
-  }
-}
-
-inline int32_t getSMVersion() {
-  int32_t deviceIndex = 0;
-  CHECK(cudaGetDevice(&deviceIndex));
-
-  int32_t major, minor;
-  CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
-                               deviceIndex));
-  CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
-                               deviceIndex));
-
-  return ((major << 8) | minor);
-}
-
-inline bool isSMSafe() {
-  const int32_t smVersion = getSMVersion();
-  return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
-         smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
-}
-
-inline bool isDataTypeSupported(DataType dataType) {
-  auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
-      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
-  if (!builder) {
-    return false;
-  }
-
-  if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
-      (dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
-    return false;
-  }
-
-  return true;
-}
-
-} // namespace samplesCommon
-
-inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
-  os << "(";
-  for (int i = 0; i < dims.nbDims; ++i) {
-    os << (i ? ", " : "") << dims.d[i];
-  }
-  return os << ")";
-}
-
-#endif // TENSORRT_COMMON_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "getOptions.h"
-#include "logger.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <cstring>
-#include <set>
-
-namespace nvinfer1 {
-namespace utility {
-
-//! Matching for TRTOptions is defined as follows:
-//!
-//! If A and B both have longName set, A matches B if and only if A.longName ==
-//! B.longName and (A.shortName == B.shortName if both have short name set).
-//!
-//! If A only has shortName set and B only has longName set, then A does not
-//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
-//! the definition of a TRTOption in the input to getOptions. As such, if the
-//! definition only has shortName set, it will never be equal to a TRTOption
-//! that does not have shortName set (and same for longName).
-//!
-//! If A and B both have shortName set but B does not have longName set, A
-//! matches B if and only if A.shortName == B.shortName.
-//!
-//! If A has neither long or short name set, A matches B if and only if B has
-//! neither long or short name set.
-bool matches(const TRTOption& a, const TRTOption& b) {
-  if (!a.longName.empty() && !b.longName.empty()) {
-    if (a.shortName && b.shortName) {
-      return (a.longName == b.longName) && (a.shortName == b.shortName);
-    }
-    return a.longName == b.longName;
-  }
-
-  // If only one of them is not set, this will return false anyway.
-  return a.shortName == b.shortName;
-}
-
-//! getTRTOptionIndex returns the index of a TRTOption in a vector of
-//! TRTOptions, -1 if not found.
-int getTRTOptionIndex(const std::vector<TRTOption>& options,
-                      const TRTOption& opt) {
-  for (size_t i = 0; i < options.size(); ++i) {
-    if (matches(opt, options[i])) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-//! validateTRTOption will return a string containing an error message if
-//! options
-//! contain non-numeric characters, or if there are duplicate option names
-//! found.
-//! Otherwise, returns the empty string.
-std::string validateTRTOption(const std::set<char>& seenShortNames,
-                              const std::set<std::string>& seenLongNames,
-                              const TRTOption& opt) {
-  if (opt.shortName != 0) {
-    if (!std::isalnum(opt.shortName)) {
-      return "Short name '" + std::to_string(opt.shortName) +
-             "' is non-alphanumeric";
-    }
-
-    if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
-      return "Short name '" + std::to_string(opt.shortName) +
-             "' is a duplicate";
-    }
-  }
-
-  if (!opt.longName.empty()) {
-    for (const char& c : opt.longName) {
-      if (!std::isalnum(c) && c != '-' && c != '_') {
-        return "Long name '" + opt.longName +
-               "' contains characters that are not '-', '_', or alphanumeric";
-      }
-    }
-
-    if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
-      return "Long name '" + opt.longName + "' is a duplicate";
-    }
-  }
-  return "";
-}
-
-//! validateTRTOptions will return a string containing an error message if any
-//! options contain non-numeric characters, or if there are duplicate option
-//! names found. Otherwise, returns the empty string.
-std::string validateTRTOptions(const std::vector<TRTOption>& options) {
-  std::set<char> seenShortNames;
-  std::set<std::string> seenLongNames;
-  for (size_t i = 0; i < options.size(); ++i) {
-    const std::string errMsg =
-        validateTRTOption(seenShortNames, seenLongNames, options[i]);
-    if (!errMsg.empty()) {
-      return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
-    }
-
-    seenShortNames.insert(options[i].shortName);
-    seenLongNames.insert(options[i].longName);
-  }
-  return "";
-}
-
-//! parseArgs parses an argument list and returns a TRTParsedArgs with the
-//! fields set accordingly. Assumes that options is validated.
-//! ErrMsg will be set if:
-//!     - an argument is null
-//!     - an argument is empty
-//!     - an argument does not have option (i.e. "-" and "--")
-//!     - a short argument has more than 1 character
-//!     - the last argument in the list requires a value
-TRTParsedArgs parseArgs(int argc, const char* const* argv,
-                        const std::vector<TRTOption>& options) {
-  TRTParsedArgs parsedArgs;
-  parsedArgs.values.resize(options.size());
-
-  for (int i = 1; i < argc; ++i) // index of current command-line argument
-  {
-    if (argv[i] == nullptr) {
-      return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
-    }
-
-    const std::string argStr(argv[i]);
-    if (argStr.empty()) {
-      return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
-    }
-
-    // No starting hyphen means it is a positional argument
-    if (argStr[0] != '-') {
-      parsedArgs.positionalArgs.push_back(argStr);
-      continue;
-    }
-
-    if (argStr == "-" || argStr == "--") {
-      return TRTParsedArgs{"Argument does not specify an option at index " +
-                           std::to_string(i)};
-    }
-
-    // If only 1 hyphen, char after is the flag.
-    TRTOption opt{' ', "", false, ""};
-    std::string value;
-    if (argStr[1] != '-') {
-      // Must only have 1 char after the hyphen
-      if (argStr.size() > 2) {
-        return TRTParsedArgs{
-            "Short arg contains more than 1 character at index " +
-            std::to_string(i)};
-      }
-      opt.shortName = argStr[1];
-    } else {
-      opt.longName = argStr.substr(2);
-
-      // We need to support --foo=bar syntax, so look for '='
-      const size_t eqIndex = opt.longName.find('=');
-      if (eqIndex < opt.longName.size()) {
-        value = opt.longName.substr(eqIndex + 1);
-        opt.longName = opt.longName.substr(0, eqIndex);
-      }
-    }
-
-    const int idx = getTRTOptionIndex(options, opt);
-    if (idx < 0) {
-      continue;
-    }
-
-    if (options[idx].valueRequired) {
-      if (!value.empty()) {
-        parsedArgs.values[idx].second.push_back(value);
-        parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
-        continue;
-      }
-
-      if (i + 1 >= argc) {
-        return TRTParsedArgs{"Last argument requires value, but none given"};
-      }
-
-      const std::string nextArg(argv[i + 1]);
-      if (nextArg.size() >= 1 && nextArg[0] == '-') {
-        sample::gLogWarning << "Warning: Using '" << nextArg
-                            << "' as a value for '" << argStr
-                            << "', Should this be its own flag?" << std::endl;
-      }
-
-      parsedArgs.values[idx].second.push_back(nextArg);
-      i += 1; // Next argument already consumed
-
-      parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
-    } else {
-      parsedArgs.values[idx].first += 1;
-    }
-  }
-  return parsedArgs;
-}
-
-TRTParsedArgs getOptions(int argc, const char* const* argv,
-                         const std::vector<TRTOption>& options) {
-  const std::string errMsg = validateTRTOptions(options);
-  if (!errMsg.empty()) {
-    return TRTParsedArgs{errMsg};
-  }
-  return parseArgs(argc, argv, options);
-}
-} // namespace utility
-} // namespace nvinfer1
--- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_GET_OPTIONS_H
-#define TRT_GET_OPTIONS_H
-
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace nvinfer1 {
-namespace utility {
-
-//! TRTOption defines a command line option. At least 1 of shortName and
-//! longName
-//! must be defined.
-//! If bool initialization is undefined behavior on your system, valueRequired
-//! must also be explicitly defined.
-//! helpText is optional.
-struct TRTOption {
-  char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
-  std::string longName; //!< Option name in long (double hyphen) form (i.e.
-                        //!--foo, --bar)
-  bool valueRequired;   //!< True if a value is needed for an option (i.e. -N 4,
-                        //!--foo bar)
-  std::string helpText; //!< Text to show when printing out the command usage
-};
-
-//! TRTParsedArgs is returned by getOptions after it has parsed a command line
-//! argument list (argv).
-//!
-//! errMsg is a string containing an error message if any errors occurred. If it
-//! is empty, no errors occurred.
-//!
-//! values stores a vector of pairs for each option (ordered by order in the
-//! input). Each pair contains an int (the number of occurrences) and a vector
-//! of strings (a list of values). The user should know which of these to use,
-//! and which options required values. For non-value options, only occurrences
-//! is
-//! populated. For value-required options, occurrences == # of values. Values do
-//! not need to be unique.
-//!
-//! positionalArgs stores additional arguments that are passed in without an
-//! option (these must not start with a hyphen).
-struct TRTParsedArgs {
-  std::string errMsg;
-  std::vector<std::pair<int, std::vector<std::string>>> values;
-  std::vector<std::string> positionalArgs;
-};
-
-//! Parse the input arguments passed to main() and extract options as well as
-//! positional arguments.
-//!
-//! Options are supposed to be passed to main() with a preceding hyphen '-'.
-//!
-//! If there is a single preceding hyphen, there should be exactly 1 character
-//! after the hyphen, which is interpreted as the option.
-//!
-//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
-//! is interpreted as the option.
-//!
-//! If the option requires a value, the next argument is used as the value.
-//!
-//! Positional arguments must not start with a hyphen.
-//!
-//! If an argument requires a value, the next argument is interpreted as the
-//! value, even if it is the form of a valid option (i.e. --foo --bar will store
-//! "--bar" as a value for option "foo" if "foo" requires a value).
-//! We also support --name=value syntax. In this case, 'value' would be used as
-//! the value, NOT the next argument.
-//!
-//! For options:
-//!   { { 'a', "", false },
-//!     { 'b', "", false },
-//!     { 0, "cee", false },
-//!     { 'd', "", true },
-//!     { 'e', "", true },
-//!     { 'f', "foo", true } }
-//!
-//! ./main hello world -a -a --cee -d 12 -f 34
-//! and
-//! ./main hello world -a -a --cee -d 12 --foo 34
-//!
-//! will result in:
-//!
-//! TRTParsedArgs {
-//!      errMsg: "",
-//!      values: { { 2, {} },
-//!                { 0, {} },
-//!                { 1, {} },
-//!                { 1, {"12"} },
-//!                { 0, {} },
-//!                { 1, {"34"} } }
-//!      positionalArgs: {"hello", "world"},
-//! }
-//!
-//! Non-POSIX behavior:
-//!      - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
-//!        option must have its own hyphen prefix.
-//!      - Does not support -e12 as a shorthand for "-e 12". Values MUST be
-//!        whitespace-separated from the option it is for.
-//!
-//! @param[in] argc The number of arguments passed to main (including the
-//!            file name, which is disregarded)
-//! @param[in] argv The arguments passed to main (including the file name,
-//!            which is disregarded)
-//! @param[in] options List of TRTOptions to parse
-//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
-//!         the fields.
-TRTParsedArgs getOptions(int argc, const char* const* argv,
-                         const std::vector<TRTOption>& options);
-} // namespace utility
-} // namespace nvinfer1
-
-#endif // TRT_GET_OPTIONS_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/half.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/half.h
--- a/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "logger.h"
-#include "ErrorRecorder.h"
-#include "logging.h"
-
-SampleErrorRecorder gRecorder;
-namespace sample {
-Logger gLogger{Logger::Severity::kINFO};
-LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
-LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
-LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
-LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
-LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
-
-void setReportableSeverity(Logger::Severity severity) {
-  gLogger.setReportableSeverity(severity);
-  gLogVerbose.setReportableSeverity(severity);
-  gLogInfo.setReportableSeverity(severity);
-  gLogWarning.setReportableSeverity(severity);
-  gLogError.setReportableSeverity(severity);
-  gLogFatal.setReportableSeverity(severity);
-}
-} // namespace sample
--- a/csrcs/fastdeploy/backends/tensorrt/common/logger.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/logger.h
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LOGGER_H
-#define LOGGER_H
-
-#include "logging.h"
-
-class SampleErrorRecorder;
-extern SampleErrorRecorder gRecorder;
-namespace sample {
-extern Logger gLogger;
-extern LogStreamConsumer gLogVerbose;
-extern LogStreamConsumer gLogInfo;
-extern LogStreamConsumer gLogWarning;
-extern LogStreamConsumer gLogError;
-extern LogStreamConsumer gLogFatal;
-
-void setReportableSeverity(Logger::Severity severity);
-} // namespace sample
-
-#endif // LOGGER_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/logging.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/logging.h
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_LOGGING_H
-#define TENSORRT_LOGGING_H
-
-#include "NvInferRuntimeCommon.h"
-#include "sampleOptions.h"
-#include <cassert>
-#include <ctime>
-#include <iomanip>
-#include <iostream>
-#include <mutex>
-#include <ostream>
-#include <sstream>
-#include <string>
-
-namespace sample {
-
-using Severity = nvinfer1::ILogger::Severity;
-
-class LogStreamConsumerBuffer : public std::stringbuf {
- public:
-  LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
-                          bool shouldLog)
-      : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
-
-  LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
-      : mOutput(other.mOutput), mPrefix(other.mPrefix),
-        mShouldLog(other.mShouldLog) {}
-  LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
-  LogStreamConsumerBuffer() = delete;
-  LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
-  LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
-
-  ~LogStreamConsumerBuffer() override {
-    // std::streambuf::pbase() gives a pointer to the beginning of the buffered
-    // part of the output sequence
-    // std::streambuf::pptr() gives a pointer to the current position of the
-    // output sequence
-    // if the pointer to the beginning is not equal to the pointer to the
-    // current position,
-    // call putOutput() to log the output to the stream
-    if (pbase() != pptr()) {
-      putOutput();
-    }
-  }
-
-  //!
-  //! synchronizes the stream buffer and returns 0 on success
-  //! synchronizing the stream buffer consists of inserting the buffer contents
-  //! into the stream,
-  //! resetting the buffer and flushing the stream
-  //!
-  int32_t sync() override {
-    putOutput();
-    return 0;
-  }
-
-  void putOutput() {
-    if (mShouldLog) {
-      // prepend timestamp
-      std::time_t timestamp = std::time(nullptr);
-      tm* tm_local = std::localtime(&timestamp);
-      mOutput << "[";
-      mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
-              << "/";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
-      mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
-              << "-";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
-      // std::stringbuf::str() gets the string contents of the buffer
-      // insert the buffer contents pre-appended by the appropriate prefix into
-      // the stream
-      mOutput << mPrefix << str();
-    }
-    // set the buffer to empty
-    str("");
-    // flush the stream
-    mOutput.flush();
-  }
-
-  void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
-
- private:
-  std::ostream& mOutput;
-  std::string mPrefix;
-  bool mShouldLog{};
-}; // class LogStreamConsumerBuffer
-
-//!
-//! \class LogStreamConsumerBase
-//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
-//! std::ostream in LogStreamConsumer
-//!
-class LogStreamConsumerBase {
- public:
-  LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
-                        bool shouldLog)
-      : mBuffer(stream, prefix, shouldLog) {}
-
- protected:
-  std::mutex mLogMutex;
-  LogStreamConsumerBuffer mBuffer;
-}; // class LogStreamConsumerBase
-
-//!
-//! \class LogStreamConsumer
-//! \brief Convenience object used to facilitate use of C++ stream syntax when
-//! logging messages.
-//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
-//!  This is because the LogStreamConsumerBase class is used to initialize the
-//!  LogStreamConsumerBuffer member field
-//!  in LogStreamConsumer and then the address of the buffer is passed to
-//!  std::ostream.
-//!  This is necessary to prevent the address of an uninitialized buffer from
-//!  being passed to std::ostream.
-//!  Please do not change the order of the parent classes.
-//!
-class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
- public:
-  //!
-  //! \brief Creates a LogStreamConsumer which logs messages with level
-  //! severity.
-  //!  Reportable severity determines if the messages are severe enough to be
-  //!  logged.
-  //!
-  LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
-                    nvinfer1::ILogger::Severity severity)
-      : LogStreamConsumerBase(severityOstream(severity),
-                              severityPrefix(severity),
-                              severity <= reportableSeverity),
-        std::ostream(&mBuffer) // links the stream buffer with the stream
-        ,
-        mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
-
-  LogStreamConsumer(LogStreamConsumer&& other) noexcept
-      : LogStreamConsumerBase(severityOstream(other.mSeverity),
-                              severityPrefix(other.mSeverity),
-                              other.mShouldLog),
-        std::ostream(&mBuffer) // links the stream buffer with the stream
-        ,
-        mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
-  LogStreamConsumer(const LogStreamConsumer& other) = delete;
-  LogStreamConsumer() = delete;
-  ~LogStreamConsumer() = default;
-  LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
-  LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
-
-  void setReportableSeverity(Severity reportableSeverity) {
-    mShouldLog = mSeverity <= reportableSeverity;
-    mBuffer.setShouldLog(mShouldLog);
-  }
-
-  std::mutex& getMutex() { return mLogMutex; }
-
-  bool getShouldLog() const { return mShouldLog; }
-
- private:
-  static std::ostream& severityOstream(Severity severity) {
-    return severity >= Severity::kINFO ? std::cout : std::cerr;
-  }
-
-  static std::string severityPrefix(Severity severity) {
-    switch (severity) {
-    case Severity::kINTERNAL_ERROR:
-      return "[F] ";
-    case Severity::kERROR:
-      return "[E] ";
-    case Severity::kWARNING:
-      return "[W] ";
-    case Severity::kINFO:
-      return "[I] ";
-    case Severity::kVERBOSE:
-      return "[V] ";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  bool mShouldLog;
-  Severity mSeverity;
-}; // class LogStreamConsumer
-
-template <typename T>
-LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    os << obj;
-  }
-  return logger;
-}
-
-//!
-//! Special handling std::endl
-//!
-inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
-                                     std::ostream& (*f)(std::ostream&)) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    os << f;
-  }
-  return logger;
-}
-
-inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
-                                     const nvinfer1::Dims& dims) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    for (int32_t i = 0; i < dims.nbDims; ++i) {
-      os << (i ? "x" : "") << dims.d[i];
-    }
-  }
-  return logger;
-}
-
-//!
-//! \class Logger
-//!
-//! \brief Class which manages logging of TensorRT tools and samples
-//!
-//! \details This class provides a common interface for TensorRT tools and
-//! samples to log information to the console,
-//! and supports logging two types of messages:
-//!
-//! - Debugging messages with an associated severity (info, warning, error, or
-//! internal error/fatal)
-//! - Test pass/fail messages
-//!
-//! The advantage of having all samples use this class for logging as opposed to
-//! emitting directly to stdout/stderr is
-//! that the logic for controlling the verbosity and formatting of sample output
-//! is centralized in one location.
-//!
-//! In the future, this class could be extended to support dumping test results
-//! to a file in some standard format
-//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
-//! duration of a test run).
-//!
-//! TODO: For backwards compatibility with existing samples, this class inherits
-//! directly from the nvinfer1::ILogger
-//! interface, which is problematic since there isn't a clean separation between
-//! messages coming from the TensorRT
-//! library and messages coming from the sample.
-//!
-//! In the future (once all samples are updated to use Logger::getTRTLogger() to
-//! access the ILogger) we can refactor the
-//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
-//! implementation a member of the Logger
-//! object.
-//!
-class Logger : public nvinfer1::ILogger {
- public:
-  explicit Logger(Severity severity = Severity::kWARNING)
-      : mReportableSeverity(severity) {}
-
-  //!
-  //! \enum TestResult
-  //! \brief Represents the state of a given test
-  //!
-  enum class TestResult {
-    kRUNNING, //!< The test is running
-    kPASSED,  //!< The test passed
-    kFAILED,  //!< The test failed
-    kWAIVED   //!< The test was waived
-  };
-
-  //!
-  //! \brief Forward-compatible method for retrieving the nvinfer::ILogger
-  //! associated with this Logger
-  //! \return The nvinfer1::ILogger associated with this Logger
-  //!
-  //! TODO Once all samples are updated to use this method to register the
-  //! logger with TensorRT,
-  //! we can eliminate the inheritance of Logger from ILogger
-  //!
-  nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
-
-  //!
-  //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
-  //!
-  //! Note samples should not be calling this function directly; it will
-  //! eventually go away once we eliminate the
-  //! inheritance from nvinfer1::ILogger
-  //!
-  void log(Severity severity, const char* msg) noexcept override {
-    LogStreamConsumer(mReportableSeverity, severity)
-        << "[TRT] " << std::string(msg) << std::endl;
-  }
-
-  //!
-  //! \brief Method for controlling the verbosity of logging output
-  //!
-  //! \param severity The logger will only emit messages that have severity of
-  //! this level or higher.
-  //!
-  void setReportableSeverity(Severity severity) noexcept {
-    mReportableSeverity = severity;
-  }
-
-  //!
-  //! \brief Opaque handle that holds logging information for a particular test
-  //!
-  //! This object is an opaque handle to information used by the Logger to print
-  //! test results.
-  //! The sample must call Logger::defineTest() in order to obtain a TestAtom
-  //! that can be used
-  //! with Logger::reportTest{Start,End}().
-  //!
-  class TestAtom {
-   public:
-    TestAtom(TestAtom&&) = default;
-
-   private:
-    friend class Logger;
-
-    TestAtom(bool started, const std::string& name, const std::string& cmdline)
-        : mStarted(started), mName(name), mCmdline(cmdline) {}
-
-    bool mStarted;
-    std::string mName;
-    std::string mCmdline;
-  };
-
-  //!
-  //! \brief Define a test for logging
-  //!
-  //! \param[in] name The name of the test.  This should be a string starting
-  //! with
-  //!                  "TensorRT" and containing dot-separated strings
-  //!                  containing
-  //!                  the characters [A-Za-z0-9_].
-  //!                  For example, "TensorRT.sample_googlenet"
-  //! \param[in] cmdline The command line used to reproduce the test
-  //
-  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
-  //!
-  static TestAtom defineTest(const std::string& name,
-                             const std::string& cmdline) {
-    return TestAtom(false, name, cmdline);
-  }
-
-  //!
-  //! \brief A convenience overloaded version of defineTest() that accepts an
-  //! array of command-line arguments
-  //!        as input
-  //!
-  //! \param[in] name The name of the test
-  //! \param[in] argc The number of command-line arguments
-  //! \param[in] argv The array of command-line arguments (given as C strings)
-  //!
-  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
-  //!
-  static TestAtom defineTest(const std::string& name, int32_t argc,
-                             char const* const* argv) {
-    // Append TensorRT version as info
-    const std::string vname =
-        name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
-    auto cmdline = genCmdlineString(argc, argv);
-    return defineTest(vname, cmdline);
-  }
-
-  //!
-  //! \brief Report that a test has started.
-  //!
-  //! \pre reportTestStart() has not been called yet for the given testAtom
-  //!
-  //! \param[in] testAtom The handle to the test that has started
-  //!
-  static void reportTestStart(TestAtom& testAtom) {
-    reportTestResult(testAtom, TestResult::kRUNNING);
-    assert(!testAtom.mStarted);
-    testAtom.mStarted = true;
-  }
-
-  //!
-  //! \brief Report that a test has ended.
-  //!
-  //! \pre reportTestStart() has been called for the given testAtom
-  //!
-  //! \param[in] testAtom The handle to the test that has ended
-  //! \param[in] result The result of the test. Should be one of
-  //! TestResult::kPASSED,
-  //!                   TestResult::kFAILED, TestResult::kWAIVED
-  //!
-  static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
-    assert(result != TestResult::kRUNNING);
-    assert(testAtom.mStarted);
-    reportTestResult(testAtom, result);
-  }
-
-  static int32_t reportPass(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kPASSED);
-    return EXIT_SUCCESS;
-  }
-
-  static int32_t reportFail(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kFAILED);
-    return EXIT_FAILURE;
-  }
-
-  static int32_t reportWaive(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kWAIVED);
-    return EXIT_SUCCESS;
-  }
-
-  static int32_t reportTest(TestAtom const& testAtom, bool pass) {
-    return pass ? reportPass(testAtom) : reportFail(testAtom);
-  }
-
-  Severity getReportableSeverity() const { return mReportableSeverity; }
-
- private:
-  //!
-  //! \brief returns an appropriate string for prefixing a log message with the
-  //! given severity
-  //!
-  static const char* severityPrefix(Severity severity) {
-    switch (severity) {
-    case Severity::kINTERNAL_ERROR:
-      return "[F] ";
-    case Severity::kERROR:
-      return "[E] ";
-    case Severity::kWARNING:
-      return "[W] ";
-    case Severity::kINFO:
-      return "[I] ";
-    case Severity::kVERBOSE:
-      return "[V] ";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  //!
-  //! \brief returns an appropriate string for prefixing a test result message
-  //! with the given result
-  //!
-  static const char* testResultString(TestResult result) {
-    switch (result) {
-    case TestResult::kRUNNING:
-      return "RUNNING";
-    case TestResult::kPASSED:
-      return "PASSED";
-    case TestResult::kFAILED:
-      return "FAILED";
-    case TestResult::kWAIVED:
-      return "WAIVED";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  //!
-  //! \brief returns an appropriate output stream (cout or cerr) to use with the
-  //! given severity
-  //!
-  static std::ostream& severityOstream(Severity severity) {
-    return severity >= Severity::kINFO ? std::cout : std::cerr;
-  }
-
-  //!
-  //! \brief method that implements logging test results
-  //!
-  static void reportTestResult(TestAtom const& testAtom, TestResult result) {
-    severityOstream(Severity::kINFO)
-        << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
-        << testAtom.mCmdline << std::endl;
-  }
-
-  //!
-  //! \brief generate a command line string from the given (argc, argv) values
-  //!
-  static std::string genCmdlineString(int32_t argc, char const* const* argv) {
-    std::stringstream ss;
-    for (int32_t i = 0; i < argc; i++) {
-      if (i > 0) {
-        ss << " ";
-      }
-      ss << argv[i];
-    }
-    return ss.str();
-  }
-
-  Severity mReportableSeverity;
-}; // class Logger
-
-namespace {
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kVERBOSE
-//!
-//! Example usage:
-//!
-//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kINFO
-//!
-//! Example usage:
-//!
-//!     LOG_INFO(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_INFO(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kWARNING
-//!
-//! Example usage:
-//!
-//!     LOG_WARN(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_WARN(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kERROR
-//!
-//! Example usage:
-//!
-//!     LOG_ERROR(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kINTERNAL_ERROR
-//!        ("fatal" severity)
-//!
-//! Example usage:
-//!
-//!     LOG_FATAL(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(),
-                           Severity::kINTERNAL_ERROR);
-}
-} // anonymous namespace
-} // namespace sample
-#endif // TENSORRT_LOGGING_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PARSER_ONNX_CONFIG_H
-#define PARSER_ONNX_CONFIG_H
-
-#include <cstring>
-#include <iostream>
-#include <string>
-
-#include "NvInfer.h"
-#include "NvOnnxConfig.h"
-#include "NvOnnxParser.h"
-
-#define ONNX_DEBUG 1
-
-/**
- * \class ParserOnnxConfig
- * \brief Configuration Manager Class Concrete Implementation
- *
- * \note:
- *
- */
-
-using namespace std;
-
-class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
- protected:
-  string mModelFilename{};
-  string mTextFilename{};
-  string mFullTextFilename{};
-  nvinfer1::DataType mModelDtype;
-  nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
-  bool mPrintLayercInfo;
-
- public:
-  ParserOnnxConfig()
-      : mModelDtype(nvinfer1::DataType::kFLOAT),
-        mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
-        mPrintLayercInfo(false) {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
-    }
-#endif
-  }
-
- protected:
-  ~ParserOnnxConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
-    }
-#endif
-  }
-
- public:
-  virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
-    mModelDtype = modelDtype;
-  }
-
-  virtual nvinfer1::DataType getModelDtype() const noexcept {
-    return mModelDtype;
-  }
-
-  virtual const char* getModelFileName() const noexcept {
-    return mModelFilename.c_str();
-  }
-  virtual void setModelFileName(const char* onnxFilename) noexcept {
-    mModelFilename = string(onnxFilename);
-  }
-  virtual nvonnxparser::IOnnxConfig::Verbosity
-  getVerbosityLevel() const noexcept {
-    return mVerbosity;
-  }
-  virtual void addVerbosity() noexcept { ++mVerbosity; }
-  virtual void reduceVerbosity() noexcept { --mVerbosity; }
-  virtual void
-  setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
-    mVerbosity = verbosity;
-  }
-
-  virtual const char* getTextFileName() const noexcept {
-    return mTextFilename.c_str();
-  }
-  virtual void setTextFileName(const char* textFilename) noexcept {
-    mTextFilename = string(textFilename);
-  }
-  virtual const char* getFullTextFileName() const noexcept {
-    return mFullTextFilename.c_str();
-  }
-  virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
-    mFullTextFilename = string(fullTextFilename);
-  }
-  virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
-  virtual void setPrintLayerInfo(bool src) noexcept {
-    mPrintLayercInfo = src;
-  } //!< get the boolean variable corresponding to the Layer Info, see
-    //! getPrintLayerInfo()
-
-  virtual bool isDebug() const noexcept {
-#if ONNX_DEBUG
-    return (std::getenv("ONNX_DEBUG") ? true : false);
-#else
-    return false;
-#endif
-  }
-
-  virtual void destroy() noexcept { delete this; }
-
-}; // class ParserOnnxConfig
-
-#endif
--- a/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_SAFE_COMMON_H
-#define TENSORRT_SAFE_COMMON_H
-
-#include "NvInferRuntimeCommon.h"
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-#include <string>
-
-#define CHECK(status)                                                          \
-  do {                                                                         \
-    auto ret = (status);                                                       \
-    if (ret != 0) {                                                            \
-      std::cerr << "Cuda failure: " << ret << std::endl;                       \
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-namespace samplesCommon {
-template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
-  if (!obj) {
-    throw std::runtime_error("Failed to create object");
-  }
-  return std::shared_ptr<T>(obj);
-}
-
-inline uint32_t elementSize(nvinfer1::DataType t) {
-  switch (t) {
-  case nvinfer1::DataType::kINT32:
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  case nvinfer1::DataType::kBOOL:
-    return 1;
-  }
-  return 0;
-}
-
-template <typename A, typename B> inline A divUp(A x, B n) {
-  return (x + n - 1) / n;
-}
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_SAFE_COMMON_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SampleConfig_H
-#define SampleConfig_H
-
-#include <cstring>
-#include <iostream>
-#include <string>
-
-#include "NvInfer.h"
-#include "NvOnnxConfig.h"
-class SampleConfig : public nvonnxparser::IOnnxConfig {
- public:
-  enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
-
- private:
-  std::string mModelFilename;
-  std::string mEngineFilename;
-  std::string mTextFilename;
-  std::string mFullTextFilename;
-  std::string mImageFilename;
-  std::string mReferenceFilename;
-  std::string mOutputFilename;
-  std::string mCalibrationFilename;
-  std::string mTimingCacheFilename;
-  int64_t mLabel{-1};
-  int64_t mMaxBatchSize{32};
-  int64_t mCalibBatchSize{0};
-  int64_t mMaxNCalibBatch{0};
-  int64_t mFirstCalibBatch{0};
-  int64_t mUseDLACore{-1};
-  nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
-  bool mTF32{true};
-  Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
-  bool mPrintLayercInfo{false};
-  bool mDebugBuilder{false};
-  InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
-  uint64_t mTopK{0};
-  float mFailurePercentage{-1.0f};
-  float mTolerance{0.0f};
-  float mAbsTolerance{1e-5f};
-
- public:
-  SampleConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
-    }
-#endif
-  }
-
- protected:
-  ~SampleConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << "SampleConfig::dtor(): " << this << std::endl;
-    }
-#endif
-  }
-
- public:
-  void setModelDtype(const nvinfer1::DataType mdt) noexcept {
-    mModelDtype = mdt;
-  }
-
-  nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
-
-  bool getTF32() const noexcept { return mTF32; }
-
-  void setTF32(bool enabled) noexcept { mTF32 = enabled; }
-
-  const char* getModelFileName() const noexcept {
-    return mModelFilename.c_str();
-  }
-
-  void setModelFileName(const char* onnxFilename) noexcept {
-    mModelFilename = std::string(onnxFilename);
-  }
-  Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
-  void addVerbosity() noexcept { ++mVerbosity; }
-  void reduceVerbosity() noexcept { --mVerbosity; }
-  virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
-  const char* getEngineFileName() const noexcept {
-    return mEngineFilename.c_str();
-  }
-  void setEngineFileName(const char* engineFilename) noexcept {
-    mEngineFilename = std::string(engineFilename);
-  }
-  const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
-  void setTextFileName(const char* textFilename) noexcept {
-    mTextFilename = std::string(textFilename);
-  }
-  const char* getFullTextFileName() const noexcept {
-    return mFullTextFilename.c_str();
-  }
-  void setFullTextFileName(const char* fullTextFilename) noexcept {
-    mFullTextFilename = std::string(fullTextFilename);
-  }
-  void setLabel(int64_t label) noexcept { mLabel = label; } //!<  set the Label
-
-  int64_t getLabel() const noexcept { return mLabel; } //!<  get the Label
-
-  bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
-
-  void setPrintLayerInfo(bool b) noexcept {
-    mPrintLayercInfo = b;
-  } //!< get the boolean variable corresponding to the Layer Info, see
-    //! getPrintLayerInfo()
-
-  void setMaxBatchSize(int64_t maxBatchSize) noexcept {
-    mMaxBatchSize = maxBatchSize;
-  } //!<  set the Max Batch Size
-  int64_t getMaxBatchSize() const noexcept {
-    return mMaxBatchSize;
-  } //!<  get the Max Batch Size
-
-  void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
-    mCalibBatchSize = CalibBatchSize;
-  } //!<  set the calibration batch size
-  int64_t getCalibBatchSize() const noexcept {
-    return mCalibBatchSize;
-  } //!<  get calibration batch size
-
-  void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
-    mMaxNCalibBatch = MaxNCalibBatch;
-  } //!<  set Max Number of Calibration Batches
-  int64_t getMaxNCalibBatch() const noexcept {
-    return mMaxNCalibBatch;
-  } //!<  get the Max Number of Calibration Batches
-
-  void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
-    mFirstCalibBatch = FirstCalibBatch;
-  } //!<  set the first calibration batch
-  int64_t getFirstCalibBatch() const noexcept {
-    return mFirstCalibBatch;
-  } //!<  get the first calibration batch
-
-  void setUseDLACore(int64_t UseDLACore) noexcept {
-    mUseDLACore = UseDLACore;
-  } //!<  set the DLA core to use
-  int64_t getUseDLACore() const noexcept {
-    return mUseDLACore;
-  } //!<  get the DLA core to use
-
-  void setDebugBuilder() noexcept {
-    mDebugBuilder = true;
-  } //!<  enable the Debug info, while building the engine.
-  bool getDebugBuilder() const noexcept {
-    return mDebugBuilder;
-  } //!<  get the boolean variable, corresponding to the debug builder
-
-  const char*
-  getImageFileName() const noexcept //!<  set Image file name (PPM or ASCII)
-  {
-    return mImageFilename.c_str();
-  }
-  void setImageFileName(
-      const char* imageFilename) noexcept //!< get the Image file name
-  {
-    mImageFilename = std::string(imageFilename);
-  }
-  const char* getReferenceFileName() const noexcept {
-    return mReferenceFilename.c_str();
-  }
-  void setReferenceFileName(
-      const char* referenceFilename) noexcept //!<  set reference file name
-  {
-    mReferenceFilename = std::string(referenceFilename);
-  }
-
-  void setInputDataFormat(InputDataFormat idt) noexcept {
-    mInputDataFormat = idt;
-  } //!<  specifies expected data format of the image file (PPM or ASCII)
-  InputDataFormat getInputDataFormat() const noexcept {
-    return mInputDataFormat;
-  } //!<  returns the expected data format of the image file.
-
-  const char* getOutputFileName()
-      const noexcept //!<  specifies the file to save the results
-  {
-    return mOutputFilename.c_str();
-  }
-  void setOutputFileName(
-      const char* outputFilename) noexcept //!<  get the output file name
-  {
-    mOutputFilename = std::string(outputFilename);
-  }
-
-  const char* getCalibrationFileName() const noexcept {
-    return mCalibrationFilename.c_str();
-  } //!<  specifies the file containing the list of image files for int8
-    //! calibration
-  void setCalibrationFileName(
-      const char* calibrationFilename) noexcept //!<  get the int 8 calibration
-                                                //! list file name
-  {
-    mCalibrationFilename = std::string(calibrationFilename);
-  }
-
-  uint64_t getTopK() const noexcept { return mTopK; }
-  void setTopK(uint64_t topK) noexcept {
-    mTopK = topK;
-  } //!<  If this options is specified, return the K top probabilities.
-
-  float getFailurePercentage() const noexcept { return mFailurePercentage; }
-
-  void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
-
-  float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
-
-  void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
-
-  float getTolerance() const noexcept { return mTolerance; }
-
-  void setTolerance(float t) noexcept { mTolerance = t; }
-
-  const char* getTimingCacheFilename() const noexcept {
-    return mTimingCacheFilename.c_str();
-  }
-
-  void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
-    mTimingCacheFilename = std::string(timingCacheFilename);
-  }
-
-  bool isDebug() const noexcept {
-#if ONNX_DEBUG
-    return (std::getenv("ONNX_DEBUG") ? true : false);
-#else
-    return false;
-#endif
-  }
-
-  void destroy() noexcept { delete this; }
-
-}; // class SampleConfig
-
-#endif
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h
@@ -1,397 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_DEVICE_H
-#define TRT_SAMPLE_DEVICE_H
-
-#include <cassert>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <iostream>
-#include <thread>
-
-namespace sample {
-
-inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
-  if (ret != cudaSuccess) {
-    err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
-    abort();
-  }
-}
-
-class TrtCudaEvent;
-
-namespace {
-
-void cudaSleep(void* sleep) {
-  std::this_thread::sleep_for(
-      std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
-}
-
-} // namespace
-
-//!
-//! \class TrtCudaStream
-//! \brief Managed CUDA stream
-//!
-class TrtCudaStream {
- public:
-  TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
-
-  TrtCudaStream(const TrtCudaStream&) = delete;
-
-  TrtCudaStream& operator=(const TrtCudaStream&) = delete;
-
-  TrtCudaStream(TrtCudaStream&&) = delete;
-
-  TrtCudaStream& operator=(TrtCudaStream&&) = delete;
-
-  ~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
-
-  cudaStream_t get() const { return mStream; }
-
-  void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
-
-  void wait(TrtCudaEvent& event);
-
-  void sleep(float* ms) {
-    cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
-  }
-
- private:
-  cudaStream_t mStream{};
-};
-
-//!
-//! \class TrtCudaEvent
-//! \brief Managed CUDA event
-//!
-class TrtCudaEvent {
- public:
-  explicit TrtCudaEvent(bool blocking = true) {
-    const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
-    cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
-  }
-
-  TrtCudaEvent(const TrtCudaEvent&) = delete;
-
-  TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
-
-  TrtCudaEvent(TrtCudaEvent&&) = delete;
-
-  TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
-
-  ~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
-
-  cudaEvent_t get() const { return mEvent; }
-
-  void record(const TrtCudaStream& stream) {
-    cudaCheck(cudaEventRecord(mEvent, stream.get()));
-  }
-
-  void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
-
-  // Returns time elapsed time in milliseconds
-  float operator-(const TrtCudaEvent& e) const {
-    float time{0};
-    cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
-    return time;
-  }
-
- private:
-  cudaEvent_t mEvent{};
-};
-
-inline void TrtCudaStream::wait(TrtCudaEvent& event) {
-  cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
-}
-
-//!
-//! \class TrtCudaGraph
-//! \brief Managed CUDA graph
-//!
-class TrtCudaGraph {
- public:
-  explicit TrtCudaGraph() = default;
-
-  TrtCudaGraph(const TrtCudaGraph&) = delete;
-
-  TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
-
-  TrtCudaGraph(TrtCudaGraph&&) = delete;
-
-  TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
-
-  ~TrtCudaGraph() {
-    if (mGraphExec) {
-      cudaGraphExecDestroy(mGraphExec);
-    }
-  }
-
-  void beginCapture(TrtCudaStream& stream) {
-    cudaCheck(
-        cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
-  }
-
-  bool launch(TrtCudaStream& stream) {
-    return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
-  }
-
-  void endCapture(TrtCudaStream& stream) {
-    cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
-    cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
-    cudaCheck(cudaGraphDestroy(mGraph));
-  }
-
-  void endCaptureOnError(TrtCudaStream& stream) {
-    // There are two possibilities why stream capture would fail:
-    // (1) stream is in cudaErrorStreamCaptureInvalidated state.
-    // (2) TRT reports a failure.
-    // In case (1), the returning mGraph should be nullptr.
-    // In case (2), the returning mGraph is not nullptr, but it should not be
-    // used.
-    const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
-    if (ret == cudaErrorStreamCaptureInvalidated) {
-      assert(mGraph == nullptr);
-    } else {
-      assert(ret == cudaSuccess);
-      assert(mGraph != nullptr);
-      cudaCheck(cudaGraphDestroy(mGraph));
-      mGraph = nullptr;
-    }
-    // Clean up any CUDA error.
-    cudaGetLastError();
-    sample::gLogWarning << "The CUDA graph capture on the stream has failed."
-                        << std::endl;
-  }
-
- private:
-  cudaGraph_t mGraph{};
-  cudaGraphExec_t mGraphExec{};
-};
-
-//!
-//! \class TrtCudaBuffer
-//! \brief Managed buffer for host and device
-//!
-template <typename A, typename D> class TrtCudaBuffer {
- public:
-  TrtCudaBuffer() = default;
-
-  TrtCudaBuffer(const TrtCudaBuffer&) = delete;
-
-  TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
-
-  TrtCudaBuffer(TrtCudaBuffer&& rhs) {
-    reset(rhs.mPtr);
-    rhs.mPtr = nullptr;
-  }
-
-  TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
-    if (this != &rhs) {
-      reset(rhs.mPtr);
-      rhs.mPtr = nullptr;
-    }
-    return *this;
-  }
-
-  ~TrtCudaBuffer() { reset(); }
-
-  TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
-
-  void allocate(size_t size) {
-    reset();
-    A()(&mPtr, size);
-  }
-
-  void reset(void* ptr = nullptr) {
-    if (mPtr) {
-      D()(mPtr);
-    }
-    mPtr = ptr;
-  }
-
-  void* get() const { return mPtr; }
-
- private:
-  void* mPtr{nullptr};
-};
-
-struct DeviceAllocator {
-  void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
-};
-
-struct DeviceDeallocator {
-  void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
-};
-
-struct ManagedAllocator {
-  void operator()(void** ptr, size_t size) {
-    cudaCheck(cudaMallocManaged(ptr, size));
-  }
-};
-
-struct HostAllocator {
-  void operator()(void** ptr, size_t size) {
-    cudaCheck(cudaMallocHost(ptr, size));
-  }
-};
-
-struct HostDeallocator {
-  void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
-};
-
-using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
-using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
-
-using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
-
-//!
-//! \class MirroredBuffer
-//! \brief Coupled host and device buffers
-//!
-class IMirroredBuffer {
- public:
-  //!
-  //! Allocate memory for the mirrored buffer give the size
-  //! of the allocation.
-  //!
-  virtual void allocate(size_t size) = 0;
-
-  //!
-  //! Get the pointer to the device side buffer.
-  //!
-  //! \return pointer to device memory or nullptr if uninitialized.
-  //!
-  virtual void* getDeviceBuffer() const = 0;
-
-  //!
-  //! Get the pointer to the host side buffer.
-  //!
-  //! \return pointer to host memory or nullptr if uninitialized.
-  //!
-  virtual void* getHostBuffer() const = 0;
-
-  //!
-  //! Copy the memory from host to device.
-  //!
-  virtual void hostToDevice(TrtCudaStream& stream) = 0;
-
-  //!
-  //! Copy the memory from device to host.
-  //!
-  virtual void deviceToHost(TrtCudaStream& stream) = 0;
-
-  //!
-  //! Interface to get the size of the memory
-  //!
-  //! \return the size of memory allocated.
-  //!
-  virtual size_t getSize() const = 0;
-
-  //!
-  //! Virtual destructor declaraion
-  //!
-  virtual ~IMirroredBuffer() = default;
-
-}; // class IMirroredBuffer
-
-//!
-//! Class to have a seperate memory buffer for discrete device and host
-//! allocations.
-//!
-class DiscreteMirroredBuffer : public IMirroredBuffer {
- public:
-  void allocate(size_t size) {
-    mSize = size;
-    mHostBuffer.allocate(size);
-    mDeviceBuffer.allocate(size);
-  }
-
-  void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
-
-  void* getHostBuffer() const { return mHostBuffer.get(); }
-
-  void hostToDevice(TrtCudaStream& stream) {
-    cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
-                              cudaMemcpyHostToDevice, stream.get()));
-  }
-
-  void deviceToHost(TrtCudaStream& stream) {
-    cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
-                              cudaMemcpyDeviceToHost, stream.get()));
-  }
-
-  size_t getSize() const { return mSize; }
-
- private:
-  size_t mSize{0};
-  TrtHostBuffer mHostBuffer;
-  TrtDeviceBuffer mDeviceBuffer;
-}; // class DiscreteMirroredBuffer
-
-//!
-//! Class to have a unified memory buffer for embedded devices.
-//!
-class UnifiedMirroredBuffer : public IMirroredBuffer {
- public:
-  void allocate(size_t size) {
-    mSize = size;
-    mBuffer.allocate(size);
-  }
-
-  void* getDeviceBuffer() const { return mBuffer.get(); }
-
-  void* getHostBuffer() const { return mBuffer.get(); }
-
-  void hostToDevice(TrtCudaStream& stream) {
-    // Does nothing since we are using unified memory.
-  }
-
-  void deviceToHost(TrtCudaStream& stream) {
-    // Does nothing since we are using unified memory.
-  }
-
-  size_t getSize() const { return mSize; }
-
- private:
-  size_t mSize{0};
-  TrtManagedBuffer mBuffer;
-}; // class UnifiedMirroredBuffer
-
-inline void setCudaDevice(int device, std::ostream& os) {
-  cudaCheck(cudaSetDevice(device));
-
-  cudaDeviceProp properties;
-  cudaCheck(cudaGetDeviceProperties(&properties, device));
-
-  // clang-format off
-    os << "=== Device Information ===" << std::endl;
-    os << "Selected Device: "      << properties.name                                               << std::endl;
-    os << "Compute Capability: "   << properties.major << "." << properties.minor                   << std::endl;
-    os << "SMs: "                  << properties.multiProcessorCount                                << std::endl;
-    os << "Compute Clock Rate: "   << properties.clockRate / 1000000.0F << " GHz"                   << std::endl;
-    os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB"                   << std::endl;
-    os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB"       << std::endl;
-    os << "Memory Bus Width: "     << properties.memoryBusWidth << " bits"
-                        << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
-    os << "Memory Clock Rate: "    << properties.memoryClockRate / 1000000.0F << " GHz"             << std::endl;
-  // clang-format on
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_DEVICE_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_ENGINES_H
-#define TRT_SAMPLE_ENGINES_H
-
-#include <iostream>
-#include <vector>
-
-//#include "NvCaffeParser.h"
-#include "NvInfer.h"
-#include "NvInferConsistency.h"
-#include "NvInferSafeRuntime.h"
-#include "NvOnnxParser.h"
-#include "sampleOptions.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-struct Parser {
-//  TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
-  TrtUniquePtr<nvonnxparser::IParser> onnxParser;
-
-  operator bool() const { return false || onnxParser; }
-};
-
-struct BuildEnvironment {
-  TrtUniquePtr<INetworkDefinition> network;
-  //! Parser that creates the network. Must be declared *after* network, so that
-  //! when
-  //! ~BuildEnvironment() executes, the parser is destroyed before the network
-  //! is destroyed.
-  Parser parser;
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
-  std::vector<uint8_t> engineBlob;
-};
-
-//!
-//! \brief Generate a network definition for a given model
-//!
-//! \return Parser The parser used to initialize the network and that holds the
-//! weights for the network, or an invalid
-//! parser (the returned parser converts to false if tested)
-//!
-//! Constant input dimensions in the model must not be changed in the
-//! corresponding
-//! network definition, because its correctness may rely on the constants.
-//!
-//! \see Parser::operator bool()
-//!
-Parser modelToNetwork(const ModelOptions& model,
-                      nvinfer1::INetworkDefinition& network, std::ostream& err);
-
-//!
-//! \brief Set up network and config
-//!
-//! \return boolean Return true if network and config were successfully set
-//!
-bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
-                           IBuilder& builder, INetworkDefinition& network,
-                           IBuilderConfig& config, std::ostream& err,
-                           std::vector<std::vector<char>>& sparseWeights);
-
-//!
-//! \brief Log refittable layers and weights of a refittable engine
-//!
-void dumpRefittable(nvinfer1::ICudaEngine& engine);
-
-//!
-//! \brief Load a serialized engine
-//!
-//! \return Pointer to the engine loaded or nullptr if the operation failed
-//!
-nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
-                                  std::ostream& err);
-
-//!
-//! \brief Save an engine into a file
-//!
-//! \return boolean Return true if the engine was successfully saved
-//!
-bool saveEngine(const nvinfer1::ICudaEngine& engine,
-                const std::string& fileName, std::ostream& err);
-
-//!
-//! \brief Create an engine from model or serialized file, and optionally save
-//! engine
-//!
-//! \return Pointer to the engine created or nullptr if the creation failed
-//!
-bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
-                       const SystemOptions& sys, BuildEnvironment& env,
-                       std::ostream& err);
-
-//!
-//! \brief Create an engine from model or serialized file, and optionally save
-//! engine
-//!
-//! \return Pointer to the engine created or nullptr if the creation failed
-//!
-inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
-                                                     const BuildOptions& build,
-                                                     const SystemOptions& sys,
-                                                     std::ostream& err) {
-  BuildEnvironment env;
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  if (getEngineBuildEnv(model, build, sys, env, err)) {
-    engine.swap(env.engine);
-  }
-  return engine;
-}
-
-//!
-//! \brief Create a serialized network
-//!
-//! \return Pointer to a host memory for a serialized network
-//!
-IHostMemory* networkToSerialized(const BuildOptions& build,
-                                 const SystemOptions& sys, IBuilder& builder,
-                                 INetworkDefinition& network,
-                                 std::ostream& err);
-
-//!
-//! \brief Tranfer model to a serialized network
-//!
-//! \return Pointer to a host memory for a serialized network
-//!
-IHostMemory* modelToSerialized(const ModelOptions& model,
-                               const BuildOptions& build,
-                               const SystemOptions& sys, std::ostream& err);
-
-//!
-//! \brief Serialize network and save it into a file
-//!
-//! \return boolean Return true if the network was successfully serialized and
-//! saved
-//!
-bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
-                      const SystemOptions& sys, std::ostream& err);
-
-bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
-               bool multiThreading);
-
-//!
-//! \brief Set tensor scales from a calibration table
-//!
-void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
-                                    const std::vector<IOFormat>& inputFormats,
-                                    const std::vector<IOFormat>& outputFormats,
-                                    const std::string& calibrationFile);
-
-//!
-//! \brief Check if safe runtime is loaded.
-//!
-bool hasSafeRuntime();
-
-//!
-//! \brief Create a safe runtime object if the dynamic library is loaded.
-//!
-nvinfer1::safe::IRuntime*
-createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
-
-//!
-//! \brief Check if consistency checker is loaded.
-//!
-bool hasConsistencyChecker();
-
-//!
-//! \brief Create a consistency checker object if the dynamic library is loaded.
-//!
-nvinfer1::consistency::IConsistencyChecker*
-createConsistencyChecker(nvinfer1::ILogger& logger,
-                         IHostMemory const* engine) noexcept;
-
-//!
-//! \brief Run consistency check on serialized engine.
-//!
-bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
-} // namespace sample
-
-#endif // TRT_SAMPLE_ENGINES_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp
@@ -1,943 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <array>
-#include <chrono>
-#include <cuda_profiler_api.h>
-#include <functional>
-#include <limits>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#if defined(__QNX__)
-#include <sys/neutrino.h>
-#include <sys/syspage.h>
-#endif
-
-#include "NvInfer.h"
-
-#include "ErrorRecorder.h"
-#include "logger.h"
-#include "sampleDevice.h"
-#include "sampleEngines.h"
-#include "sampleInference.h"
-#include "sampleOptions.h"
-#include "sampleReporting.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-template <class MapType, class EngineType>
-bool validateTensorNames(const MapType& map, const EngineType* engine,
-                         const int32_t endBindingIndex) {
-  // Check if the provided input tensor names match the input tensors of the
-  // engine.
-  // Throw an error if the provided input tensor names cannot be found because
-  // it implies a potential typo.
-  for (const auto& item : map) {
-    bool tensorNameFound{false};
-    for (int32_t b = 0; b < endBindingIndex; ++b) {
-      if (engine->bindingIsInput(b) &&
-          engine->getBindingName(b) == item.first) {
-        tensorNameFound = true;
-        break;
-      }
-    }
-    if (!tensorNameFound) {
-      sample::gLogError
-          << "Cannot find input tensor with name \"" << item.first
-          << "\" in the engine bindings! "
-          << "Please make sure the input tensor names are correct."
-          << std::endl;
-      return false;
-    }
-  }
-  return true;
-}
-
-template <class EngineType, class ContextType> class FillBindingClosure {
- private:
-  using InputsMap = std::unordered_map<std::string, std::string>;
-  using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
-
-  EngineType const* engine;
-  ContextType const* context;
-  InputsMap const& inputs;
-  BindingsVector& bindings;
-  int32_t batch;
-  int32_t endBindingIndex;
-
-  void fillOneBinding(int32_t bindingIndex, int64_t vol) {
-    auto const dims = getDims(bindingIndex);
-    auto const name = engine->getBindingName(bindingIndex);
-    auto const isInput = engine->bindingIsInput(bindingIndex);
-    auto const dataType = engine->getBindingDataType(bindingIndex);
-    auto const* bindingInOutStr = isInput ? "input" : "output";
-    for (auto& binding : bindings) {
-      const auto input = inputs.find(name);
-      if (isInput && input != inputs.end()) {
-        sample::gLogInfo << "Using values loaded from " << input->second
-                         << " for input " << name << std::endl;
-        binding->addBinding(bindingIndex, name, isInput, vol, dataType,
-                            input->second);
-      } else {
-        sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
-                         << name << std::endl;
-        binding->addBinding(bindingIndex, name, isInput, vol, dataType);
-      }
-      sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
-                       << name << " with dimensions " << dims << std::endl;
-    }
-  }
-
-  bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
-    if (!validateTensorNames(inputs, engine, endBindingIndex)) {
-      sample::gLogError << "Invalid tensor names found in --loadInputs flag."
-                        << std::endl;
-      return false;
-    }
-
-    for (int32_t b = 0; b < endBindingIndex; b++) {
-      auto const dims = getDims(b);
-      auto const comps = engine->getBindingComponentsPerElement(b);
-      auto const strides = context->getStrides(b);
-      int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
-      auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
-      fillOneBinding(b, vol);
-    }
-    return true;
-  }
-
-  Dims getDims(int32_t bindingIndex);
-
- public:
-  FillBindingClosure(EngineType const* _engine, ContextType const* _context,
-                     InputsMap const& _inputs, BindingsVector& _bindings,
-                     int32_t _batch, int32_t _endBindingIndex)
-      : engine(_engine), context(_context), inputs(_inputs),
-        bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
-
-  bool operator()() { return fillAllBindings(batch, endBindingIndex); }
-};
-
-template <>
-Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
-    getDims(int32_t bindingIndex) {
-  return context->getBindingDimensions(bindingIndex);
-}
-
-template <>
-Dims FillBindingClosure<
-    nvinfer1::safe::ICudaEngine,
-    nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
-  return engine->getBindingDimensions(bindingIndex);
-}
-
-bool setUpInference(InferenceEnvironment& iEnv,
-                    const InferenceOptions& inference) {
-  int32_t device{};
-  cudaCheck(cudaGetDevice(&device));
-
-  cudaDeviceProp properties;
-  cudaCheck(cudaGetDeviceProperties(&properties, device));
-  // Use managed memory on integrated devices when transfers are skipped
-  // and when it is explicitly requested on the commandline.
-  bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
-                        inference.useManaged};
-  using FillSafeBindings =
-      FillBindingClosure<nvinfer1::safe::ICudaEngine,
-                         nvinfer1::safe::IExecutionContext>;
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime());
-    auto* safeEngine = iEnv.safeEngine.get();
-    for (int32_t s = 0; s < inference.streams; ++s) {
-      iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
-      iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
-    }
-    const int32_t nBindings = safeEngine->getNbBindings();
-    auto const* safeContext = iEnv.safeContext.front().get();
-    // batch is set to 1 because safety only support explicit batch.
-    return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
-                            inference.inputs, iEnv.bindings, 1, nBindings)();
-  }
-
-  using FillStdBindings =
-      FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
-
-  for (int32_t s = 0; s < inference.streams; ++s) {
-    auto ec = iEnv.engine->createExecutionContext();
-    if (ec == nullptr) {
-      sample::gLogError << "Unable to create execution context for stream " << s
-                        << "." << std::endl;
-      return false;
-    }
-    iEnv.context.emplace_back(ec);
-    iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
-  }
-  if (iEnv.profiler) {
-    iEnv.context.front()->setProfiler(iEnv.profiler.get());
-    // Always run reportToProfiler() after enqueue launch
-    iEnv.context.front()->setEnqueueEmitsProfile(false);
-  }
-
-  const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
-  const int32_t nBindings = iEnv.engine->getNbBindings();
-  const int32_t bindingsInProfile =
-      nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
-  const int32_t endBindingIndex =
-      bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
-
-  if (nOptProfiles > 1) {
-    sample::gLogWarning << "Multiple profiles are currently not supported. "
-                           "Running with one profile."
-                        << std::endl;
-  }
-
-  // Make sure that the tensor names provided in command-line args actually
-  // exist in any of the engine bindings
-  // to avoid silent typos.
-  if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
-                           endBindingIndex)) {
-    sample::gLogError << "Invalid tensor names found in --shapes flag."
-                      << std::endl;
-    return false;
-  }
-
-  // Set all input dimensions before all bindings can be allocated
-  for (int32_t b = 0; b < endBindingIndex; ++b) {
-    if (iEnv.engine->bindingIsInput(b)) {
-      auto dims = iEnv.context.front()->getBindingDimensions(b);
-      const bool isScalar = dims.nbDims == 0;
-      const bool isDynamicInput =
-          std::any_of(dims.d, dims.d + dims.nbDims,
-                      [](int32_t dim) { return dim == -1; }) ||
-          iEnv.engine->isShapeBinding(b);
-      if (isDynamicInput) {
-        auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
-
-        std::vector<int32_t> staticDims;
-        if (shape == inference.shapes.end()) {
-          // If no shape is provided, set dynamic dimensions to 1.
-          constexpr int32_t DEFAULT_DIMENSION = 1;
-          if (iEnv.engine->isShapeBinding(b)) {
-            if (isScalar) {
-              staticDims.push_back(1);
-            } else {
-              staticDims.resize(dims.d[0]);
-              std::fill(staticDims.begin(), staticDims.end(),
-                        DEFAULT_DIMENSION);
-            }
-          } else {
-            staticDims.resize(dims.nbDims);
-            std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
-                           [&](int32_t dimension) {
-                             return dimension >= 0 ? dimension
-                                                   : DEFAULT_DIMENSION;
-                           });
-          }
-          sample::gLogWarning << "Dynamic dimensions required for input: "
-                              << iEnv.engine->getBindingName(b)
-                              << ", but no shapes were provided. Automatically "
-                                 "overriding shape to: "
-                              << staticDims << std::endl;
-        } else if (inference.inputs.count(shape->first) &&
-                   iEnv.engine->isShapeBinding(b)) {
-          if (isScalar || dims.nbDims == 1) {
-            // Load shape tensor from file.
-            size_t const size = isScalar ? 1 : dims.d[0];
-            staticDims.resize(size);
-            auto const& filename = inference.inputs.at(shape->first);
-            auto dst = reinterpret_cast<char*>(staticDims.data());
-            loadFromFile(filename, dst,
-                         size * sizeof(decltype(staticDims)::value_type));
-          } else {
-            sample::gLogWarning << "Cannot load shape tensor " << shape->first
-                                << " from file, "
-                                << "ND-Shape isn't supported yet" << std::endl;
-            // Fallback
-            staticDims = shape->second;
-          }
-        } else {
-          staticDims = shape->second;
-        }
-
-        for (auto& c : iEnv.context) {
-          if (iEnv.engine->isShapeBinding(b)) {
-            if (!c->setInputShapeBinding(b, staticDims.data())) {
-              return false;
-            }
-          } else {
-            if (!c->setBindingDimensions(b, toDims(staticDims))) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  auto* engine = iEnv.engine.get();
-  auto const* context = iEnv.context.front().get();
-  int32_t const batch =
-      engine->hasImplicitBatchDimension() ? inference.batch : 1;
-  return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
-                         batch, endBindingIndex)();
-}
-
-namespace {
-
-#if defined(__QNX__)
-using TimePoint = double;
-#else
-using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
-#endif
-
-TimePoint getCurrentTime() {
-#if defined(__QNX__)
-  uint64_t const currentCycles = ClockCycles();
-  uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
-  // Return current timestamp in ms.
-  return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
-#else
-  return std::chrono::high_resolution_clock::now();
-#endif
-}
-
-//!
-//! \struct SyncStruct
-//! \brief Threads synchronization structure
-//!
-struct SyncStruct {
-  std::mutex mutex;
-  TrtCudaStream mainStream;
-  TrtCudaEvent gpuStart{cudaEventBlockingSync};
-  TimePoint cpuStart{};
-  float sleep{};
-};
-
-struct Enqueue {
-  explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
-      : mContext(context), mBuffers(buffers) {}
-
-  nvinfer1::IExecutionContext& mContext;
-  void** mBuffers{};
-};
-
-//!
-//! \class EnqueueImplicit
-//! \brief Functor to enqueue inference with implict batch
-//!
-class EnqueueImplicit : private Enqueue {
- public:
-  explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
-                           int32_t batch)
-      : Enqueue(context, buffers), mBatch(batch) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
-          !mContext.reportToProfiler()) {
-        gLogWarning
-            << "Failed to collect layer timing info from previous enqueue()"
-            << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-
- private:
-  int32_t mBatch;
-};
-
-//!
-//! \class EnqueueExplicit
-//! \brief Functor to enqueue inference with explict batch
-//!
-class EnqueueExplicit : private Enqueue {
- public:
-  explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
-      : Enqueue(context, buffers) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
-          !mContext.reportToProfiler()) {
-        gLogWarning
-            << "Failed to collect layer timing info from previous enqueueV2()"
-            << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-};
-
-//!
-//! \class EnqueueGraph
-//! \brief Functor to enqueue inference from CUDA Graph
-//!
-class EnqueueGraph {
- public:
-  explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
-                        TrtCudaGraph& graph)
-      : mGraph(graph), mContext(context) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mGraph.launch(stream)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.reportToProfiler()) {
-        gLogWarning << "Failed to collect layer timing info from previous CUDA "
-                       "graph launch"
-                    << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-
-  TrtCudaGraph& mGraph;
-  nvinfer1::IExecutionContext& mContext;
-};
-
-//!
-//! \class EnqueueSafe
-//! \brief Functor to enqueue safe execution context
-//!
-class EnqueueSafe {
- public:
-  explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
-                       void** buffers)
-      : mContext(context), mBuffers(buffers) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
-      return true;
-    }
-    return false;
-  }
-
-  nvinfer1::safe::IExecutionContext& mContext;
-  void** mBuffers{};
-};
-
-using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
-
-enum class StreamType : int32_t {
-  kINPUT = 0,
-  kCOMPUTE = 1,
-  kOUTPUT = 2,
-  kNUM = 3
-};
-
-enum class EventType : int32_t {
-  kINPUT_S = 0,
-  kINPUT_E = 1,
-  kCOMPUTE_S = 2,
-  kCOMPUTE_E = 3,
-  kOUTPUT_S = 4,
-  kOUTPUT_E = 5,
-  kNUM = 6
-};
-
-using MultiStream =
-    std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
-
-using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
-                              static_cast<int32_t>(EventType::kNUM)>;
-
-using EnqueueTimes = std::array<TimePoint, 2>;
-
-//!
-//! \class Iteration
-//! \brief Inference iteration and streams management
-//!
-template <class ContextType> class Iteration {
- public:
-  Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
-            Bindings& bindings)
-      : mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
-        mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
-        mContext(&context) {
-    for (int32_t d = 0; d < mDepth; ++d) {
-      for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
-        mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
-      }
-    }
-    createEnqueueFunction(inference, context, bindings);
-  }
-
-  bool query(bool skipTransfers) {
-    if (mActive[mNext]) {
-      return true;
-    }
-
-    if (!skipTransfers) {
-      record(EventType::kINPUT_S, StreamType::kINPUT);
-      mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
-      record(EventType::kINPUT_E, StreamType::kINPUT);
-      wait(EventType::kINPUT_E,
-           StreamType::kCOMPUTE); // Wait for input DMA before compute
-    }
-
-    record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
-    recordEnqueueTime();
-    if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
-      return false;
-    }
-    recordEnqueueTime();
-    record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
-
-    if (!skipTransfers) {
-      wait(EventType::kCOMPUTE_E,
-           StreamType::kOUTPUT); // Wait for compute before output DMA
-      record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
-      mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
-      record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
-    }
-
-    mActive[mNext] = true;
-    moveNext();
-    return true;
-  }
-
-  float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
-             std::vector<InferenceTrace>& trace, bool skipTransfers) {
-    if (mActive[mNext]) {
-      if (skipTransfers) {
-        getEvent(EventType::kCOMPUTE_E).synchronize();
-      } else {
-        getEvent(EventType::kOUTPUT_E).synchronize();
-      }
-      trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
-      mActive[mNext] = false;
-      return getEvent(EventType::kCOMPUTE_S) - gpuStart;
-    }
-    return 0;
-  }
-
-  void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
-               std::vector<InferenceTrace>& trace, bool skipTransfers) {
-    for (int32_t d = 0; d < mDepth; ++d) {
-      sync(cpuStart, gpuStart, trace, skipTransfers);
-      moveNext();
-    }
-  }
-
-  void wait(TrtCudaEvent& gpuStart) {
-    getStream(StreamType::kINPUT).wait(gpuStart);
-  }
-
-  void setInputData() {
-    mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
-  }
-
-  void fetchOutputData() {
-    mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
-  }
-
- private:
-  void moveNext() { mNext = mDepth - 1 - mNext; }
-
-  TrtCudaStream& getStream(StreamType t) {
-    return mStream[static_cast<int32_t>(t)];
-  }
-
-  TrtCudaEvent& getEvent(EventType t) {
-    return *mEvents[mNext][static_cast<int32_t>(t)];
-  }
-
-  void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
-
-  void recordEnqueueTime() {
-    mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
-    enqueueStart = 1 - enqueueStart;
-  }
-
-  TimePoint getEnqueueTime(bool start) {
-    return mEnqueueTimes[mNext][start ? 0 : 1];
-  }
-
-  void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
-
-  InferenceTrace getTrace(const TimePoint& cpuStart,
-                          const TrtCudaEvent& gpuStart, bool skipTransfers) {
-    float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
-                             : getEvent(EventType::kINPUT_S) - gpuStart;
-    float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
-                             : getEvent(EventType::kINPUT_E) - gpuStart;
-    float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
-                             : getEvent(EventType::kOUTPUT_S) - gpuStart;
-    float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
-                             : getEvent(EventType::kOUTPUT_E) - gpuStart;
-
-    return InferenceTrace(mStreamId,
-                          std::chrono::duration<float, std::milli>(
-                              getEnqueueTime(true) - cpuStart)
-                              .count(),
-                          std::chrono::duration<float, std::milli>(
-                              getEnqueueTime(false) - cpuStart)
-                              .count(),
-                          is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
-                          getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
-  }
-
-  void createEnqueueFunction(const InferenceOptions& inference,
-                             nvinfer1::IExecutionContext& context,
-                             Bindings& bindings) {
-    if (inference.batch) {
-      mEnqueue = EnqueueFunction(EnqueueImplicit(
-          context, mBindings.getDeviceBuffers(), inference.batch));
-    } else {
-      mEnqueue = EnqueueFunction(
-          EnqueueExplicit(context, mBindings.getDeviceBuffers()));
-    }
-    if (inference.graph) {
-      TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
-      // Avoid capturing initialization calls by executing the enqueue function
-      // at least
-      // once before starting CUDA graph capture.
-      const auto ret = mEnqueue(stream);
-      assert(ret);
-      stream.synchronize();
-
-      mGraph.beginCapture(stream);
-      // The built TRT engine may contain operations that are not permitted
-      // under CUDA graph capture mode.
-      // When the stream is capturing, the enqueue call may return false if the
-      // current CUDA graph capture fails.
-      if (mEnqueue(stream)) {
-        mGraph.endCapture(stream);
-        mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
-      } else {
-        mGraph.endCaptureOnError(stream);
-        // Ensure any CUDA error has been cleaned up.
-        cudaCheck(cudaGetLastError());
-        sample::gLogWarning << "The built TensorRT engine contains operations "
-                               "that are not permitted under "
-                               "CUDA graph capture mode."
-                            << std::endl;
-        sample::gLogWarning << "The specified --useCudaGraph flag has been "
-                               "ignored. The inference will be "
-                               "launched without using CUDA graph launch."
-                            << std::endl;
-      }
-    }
-  }
-
-  void createEnqueueFunction(const InferenceOptions&,
-                             nvinfer1::safe::IExecutionContext& context,
-                             Bindings&) {
-    mEnqueue =
-        EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
-  }
-
-  Bindings& mBindings;
-
-  TrtCudaGraph mGraph;
-  EnqueueFunction mEnqueue;
-
-  int32_t mStreamId{0};
-  int32_t mNext{0};
-  int32_t mDepth{2}; // default to double buffer to hide DMA transfers
-
-  std::vector<bool> mActive;
-  MultiStream mStream;
-  std::vector<MultiEvent> mEvents;
-
-  int32_t enqueueStart{0};
-  std::vector<EnqueueTimes> mEnqueueTimes;
-  ContextType* mContext{nullptr};
-};
-
-template <class ContextType>
-bool inferenceLoop(
-    std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
-    const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
-    float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
-    bool skipTransfers, float idleMs) {
-  float durationMs = 0;
-  int32_t skip = 0;
-
-  for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
-       ++i) {
-    for (auto& s : iStreams) {
-      if (!s->query(skipTransfers)) {
-        return false;
-      }
-    }
-    for (auto& s : iStreams) {
-      durationMs = std::max(durationMs,
-                            s->sync(cpuStart, gpuStart, trace, skipTransfers));
-    }
-    if (durationMs < warmupMs) // Warming up
-    {
-      if (durationMs) // Skip complete iterations
-      {
-        ++skip;
-      }
-      continue;
-    }
-    if (idleMs != 0.F) {
-      std::this_thread::sleep_for(
-          std::chrono::duration<float, std::milli>(idleMs));
-    }
-  }
-  for (auto& s : iStreams) {
-    s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
-  }
-  return true;
-}
-
-template <class ContextType>
-void inferenceExecution(const InferenceOptions& inference,
-                        InferenceEnvironment& iEnv, SyncStruct& sync,
-                        const int32_t threadIdx, const int32_t streamsPerThread,
-                        int32_t device, std::vector<InferenceTrace>& trace) {
-  float warmupMs = inference.warmup;
-  float durationMs = inference.duration * 1000.F + warmupMs;
-
-  cudaCheck(cudaSetDevice(device));
-
-  std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
-
-  for (int32_t s = 0; s < streamsPerThread; ++s) {
-    const int32_t streamId{threadIdx * streamsPerThread + s};
-    auto* iteration = new Iteration<ContextType>(
-        streamId, inference, *iEnv.template getContext<ContextType>(streamId),
-        *iEnv.bindings[streamId]);
-    if (inference.skipTransfers) {
-      iteration->setInputData();
-    }
-    iStreams.emplace_back(iteration);
-  }
-
-  for (auto& s : iStreams) {
-    s->wait(sync.gpuStart);
-  }
-
-  std::vector<InferenceTrace> localTrace;
-  if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
-                     inference.iterations, durationMs, warmupMs, localTrace,
-                     inference.skipTransfers, inference.idle)) {
-    iEnv.error = true;
-  }
-
-  if (inference.skipTransfers) {
-    for (auto& s : iStreams) {
-      s->fetchOutputData();
-    }
-  }
-
-  sync.mutex.lock();
-  trace.insert(trace.end(), localTrace.begin(), localTrace.end());
-  sync.mutex.unlock();
-}
-
-inline std::thread makeThread(const InferenceOptions& inference,
-                              InferenceEnvironment& iEnv, SyncStruct& sync,
-                              int32_t threadIdx, int32_t streamsPerThread,
-                              int32_t device,
-                              std::vector<InferenceTrace>& trace) {
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime());
-    return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
-                       std::cref(inference), std::ref(iEnv), std::ref(sync),
-                       threadIdx, streamsPerThread, device, std::ref(trace));
-  }
-
-  return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
-                     std::cref(inference), std::ref(iEnv), std::ref(sync),
-                     threadIdx, streamsPerThread, device, std::ref(trace));
-}
-
-} // namespace
-
-bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
-                  int32_t device, std::vector<InferenceTrace>& trace) {
-  cudaCheck(cudaProfilerStart());
-
-  trace.resize(0);
-
-  SyncStruct sync;
-  sync.sleep = inference.sleep;
-  sync.mainStream.sleep(&sync.sleep);
-  sync.cpuStart = getCurrentTime();
-  sync.gpuStart.record(sync.mainStream);
-
-  // When multiple streams are used, trtexec can run inference in two modes:
-  // (1) if inference.threads is true, then run each stream on each thread.
-  // (2) if inference.threads is false, then run all streams on the same thread.
-  const int32_t numThreads = inference.threads ? inference.streams : 1;
-  const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
-
-  std::vector<std::thread> threads;
-  for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
-    threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
-                                    streamsPerThread, device, trace));
-  }
-  for (auto& th : threads) {
-    th.join();
-  }
-
-  cudaCheck(cudaProfilerStop());
-
-  auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
-    return a.h2dStart < b.h2dStart;
-  };
-  std::sort(trace.begin(), trace.end(), cmpTrace);
-
-  return !iEnv.error;
-}
-
-namespace {
-size_t reportGpuMemory() {
-  static size_t prevFree{0};
-  size_t free{0};
-  size_t total{0};
-  size_t newlyAllocated{0};
-  cudaCheck(cudaMemGetInfo(&free, &total));
-  sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
-  if (prevFree != 0) {
-    newlyAllocated = (prevFree - free);
-    sample::gLogInfo << ", newly allocated GPU memory = "
-                     << newlyAllocated / 1024.0_MiB << " GiB";
-  }
-  sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
-                   << std::endl;
-  prevFree = free;
-  return newlyAllocated;
-}
-} // namespace
-
-//! Returns true if deserialization is slower than expected or fails.
-bool timeDeserialize(InferenceEnvironment& iEnv) {
-  constexpr int32_t kNB_ITERS{20};
-  std::unique_ptr<IRuntime> rt{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  std::unique_ptr<ICudaEngine> engine;
-
-  std::unique_ptr<safe::IRuntime> safeRT{
-      sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
-  std::unique_ptr<safe::ICudaEngine> safeEngine;
-
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
-    safeRT->setErrorRecorder(&gRecorder);
-  }
-
-  auto timeDeserializeFn = [&]() -> float {
-    bool deserializeOK{false};
-    engine.reset(nullptr);
-    safeEngine.reset(nullptr);
-    auto startClock = std::chrono::high_resolution_clock::now();
-    if (iEnv.safe) {
-      safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
-                                                     iEnv.engineBlob.size()));
-      deserializeOK = (safeEngine != nullptr);
-    } else {
-      engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
-                                             iEnv.engineBlob.size(), nullptr));
-      deserializeOK = (engine != nullptr);
-    }
-    auto endClock = std::chrono::high_resolution_clock::now();
-    // return NAN if deserialization failed.
-    return deserializeOK
-               ? std::chrono::duration<float, std::milli>(endClock - startClock)
-                     .count()
-               : NAN;
-  };
-
-  // Warmup the caches to make sure that cache thrashing isn't throwing off the
-  // results
-  {
-    sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
-    for (int32_t i = 0, e = 2; i < e; ++i) {
-      timeDeserializeFn();
-    }
-  }
-  sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
-  float const first = timeDeserializeFn();
-
-  // Check if first deserialization suceeded.
-  if (std::isnan(first)) {
-    sample::gLogError << "Engine deserialization failed." << std::endl;
-    return true;
-  }
-
-  sample::gLogInfo << "First deserialization time = " << first
-                   << " milliseconds" << std::endl;
-
-  // Record initial gpu memory state.
-  reportGpuMemory();
-
-  float totalTime{0.F};
-  for (int32_t i = 0; i < kNB_ITERS; ++i) {
-    totalTime += timeDeserializeFn();
-  }
-  const auto averageTime = totalTime / kNB_ITERS;
-  // reportGpuMemory sometimes reports zero after a single deserialization of a
-  // small engine,
-  // so use the size of memory for all the iterations.
-  const auto totalEngineSizeGpu = reportGpuMemory();
-  sample::gLogInfo << "Total deserialization time = " << totalTime
-                   << " milliseconds in " << kNB_ITERS
-                   << " iterations, average time = " << averageTime
-                   << " milliseconds, first time = " << first
-                   << " milliseconds." << std::endl;
-  sample::gLogInfo << "Deserialization Bandwidth = "
-                   << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
-                   << std::endl;
-
-  // If the first deserialization is more than tolerance slower than
-  // the average deserialization, return true, which means an error occurred.
-  // The tolerance is set to 2x since the deserialization time is quick and
-  // susceptible
-  // to caching issues causing problems in the first timing.
-  const auto tolerance = 2.0F;
-  const bool isSlowerThanExpected = first > averageTime * tolerance;
-  if (isSlowerThanExpected) {
-    sample::gLogInfo << "First deserialization time divided by average time is "
-                     << (first / averageTime) << ". Exceeds tolerance of "
-                     << tolerance << "x." << std::endl;
-  }
-  return isSlowerThanExpected;
-}
-
-std::string getLayerInformation(const InferenceEnvironment& iEnv,
-                                nvinfer1::LayerInformationFormat format) {
-  auto runtime = std::unique_ptr<IRuntime>(
-      createInferRuntime(sample::gLogger.getTRTLogger()));
-  auto inspector =
-      std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
-  if (!iEnv.context.empty()) {
-    inspector->setExecutionContext(iEnv.context.front().get());
-  }
-  std::string result = inspector->getEngineInformation(format);
-  return result;
-}
-
-} // namespace sample
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_INFERENCE_H
-#define TRT_SAMPLE_INFERENCE_H
-
-#include "sampleReporting.h"
-#include "sampleUtils.h"
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "NvInfer.h"
-#include "NvInferSafeRuntime.h"
-
-namespace sample {
-
-struct InferenceEnvironment {
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  std::unique_ptr<Profiler> profiler;
-  std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
-  std::vector<std::unique_ptr<Bindings>> bindings;
-  bool error{false};
-
-  std::vector<uint8_t> engineBlob;
-
-  bool safe{false};
-  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
-  std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
-
-  template <class ContextType>
-  inline ContextType* getContext(int32_t streamIdx);
-};
-
-template <>
-inline nvinfer1::IExecutionContext*
-InferenceEnvironment::getContext(int32_t streamIdx) {
-  return context[streamIdx].get();
-}
-
-template <>
-inline nvinfer1::safe::IExecutionContext*
-InferenceEnvironment::getContext(int32_t streamIdx) {
-  return safeContext[streamIdx].get();
-}
-
-//!
-//! \brief Set up contexts and bindings for inference
-//!
-bool setUpInference(InferenceEnvironment& iEnv,
-                    const InferenceOptions& inference);
-
-//!
-//! \brief Deserialize the engine and time how long it takes.
-//!
-bool timeDeserialize(InferenceEnvironment& iEnv);
-
-//!
-//! \brief Run inference and collect timing, return false if any error hit
-//! during inference
-//!
-bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
-                  int32_t device, std::vector<InferenceTrace>& trace);
-
-//!
-//! \brief Get layer information of the engine.
-//!
-std::string getLayerInformation(const InferenceEnvironment& iEnv,
-                                nvinfer1::LayerInformationFormat format);
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_INFERENCE_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_OPTIONS_H
-#define TRT_SAMPLE_OPTIONS_H
-
-#include <algorithm>
-#include <array>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "NvInfer.h"
-
-namespace sample {
-
-// Build default params
-constexpr int32_t maxBatchNotProvided{0};
-constexpr int32_t defaultMinTiming{1};
-constexpr int32_t defaultAvgTiming{8};
-
-// System default params
-constexpr int32_t defaultDevice{0};
-
-// Inference default params
-constexpr int32_t defaultBatch{1};
-constexpr int32_t batchNotProvided{0};
-constexpr int32_t defaultStreams{1};
-constexpr int32_t defaultIterations{10};
-constexpr float defaultWarmUp{200.F};
-constexpr float defaultDuration{3.F};
-constexpr float defaultSleep{};
-constexpr float defaultIdle{};
-
-// Reporting default params
-constexpr int32_t defaultAvgRuns{10};
-constexpr float defaultPercentile{99};
-
-enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
-
-enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
-
-enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
-
-enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
-
-using Arguments = std::unordered_multimap<std::string, std::string>;
-
-using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
-
-using ShapeRange =
-    std::array<std::vector<int32_t>,
-               nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
-
-using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
-using LayerOutputTypes =
-    std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
-
-struct Options {
-  virtual void parse(Arguments& arguments) = 0;
-};
-
-struct BaseModelOptions : public Options {
-  ModelFormat format{ModelFormat::kANY};
-  std::string model;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct UffInput : public Options {
-  std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
-  bool NHWC{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct ModelOptions : public Options {
-  BaseModelOptions baseModel;
-  std::string prototxt;
-  std::vector<std::string> outputs;
-  UffInput uffInputs;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct BuildOptions : public Options {
-  int32_t maxBatch{maxBatchNotProvided};
-  double workspace{-1.0};
-  double dlaSRAM{-1.0};
-  double dlaLocalDRAM{-1.0};
-  double dlaGlobalDRAM{-1.0};
-  int32_t minTiming{defaultMinTiming};
-  int32_t avgTiming{defaultAvgTiming};
-  bool tf32{true};
-  bool fp16{false};
-  bool int8{false};
-  bool directIO{false};
-  PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
-  LayerPrecisions layerPrecisions;
-  LayerOutputTypes layerOutputTypes;
-  bool safe{false};
-  bool consistency{false};
-  bool restricted{false};
-  bool save{false};
-  bool load{false};
-  bool refittable{false};
-  SparsityFlag sparsity{SparsityFlag::kDISABLE};
-  nvinfer1::ProfilingVerbosity profilingVerbosity{
-      nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
-  std::string engine;
-  std::string calibration;
-  std::unordered_map<std::string, ShapeRange> shapes;
-  std::unordered_map<std::string, ShapeRange> shapesCalib;
-  std::vector<IOFormat> inputFormats;
-  std::vector<IOFormat> outputFormats;
-  nvinfer1::TacticSources enabledTactics{0};
-  nvinfer1::TacticSources disabledTactics{0};
-  TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
-  std::string timingCacheFile{};
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct SystemOptions : public Options {
-  int32_t device{defaultDevice};
-  int32_t DLACore{-1};
-  bool fallback{false};
-  std::vector<std::string> plugins;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct InferenceOptions : public Options {
-  int32_t batch{batchNotProvided};
-  int32_t iterations{defaultIterations};
-  int32_t streams{defaultStreams};
-  float warmup{defaultWarmUp};
-  float duration{defaultDuration};
-  float sleep{defaultSleep};
-  float idle{defaultIdle};
-  bool overlap{true};
-  bool skipTransfers{false};
-  bool useManaged{false};
-  bool spin{false};
-  bool threads{false};
-  bool graph{false};
-  bool skip{false};
-  bool rerun{false};
-  bool timeDeserialize{false};
-  bool timeRefit{false};
-  std::unordered_map<std::string, std::string> inputs;
-  std::unordered_map<std::string, std::vector<int32_t>> shapes;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct ReportingOptions : public Options {
-  bool verbose{false};
-  int32_t avgs{defaultAvgRuns};
-  float percentile{defaultPercentile};
-  bool refit{false};
-  bool output{false};
-  bool profile{false};
-  bool layerInfo{false};
-  std::string exportTimes;
-  std::string exportOutput;
-  std::string exportProfile;
-  std::string exportLayerInfo;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct SafeBuilderOptions : public Options {
-  std::string serialized{};
-  std::string onnxModelFile{};
-  bool help{false};
-  bool verbose{false};
-  std::vector<IOFormat> inputFormats;
-  std::vector<IOFormat> outputFormats;
-  bool int8{false};
-  std::string calibFile{};
-  std::vector<std::string> plugins;
-  bool consistency{false};
-  bool standard{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void printHelp(std::ostream& out);
-};
-
-struct AllOptions : public Options {
-  ModelOptions model;
-  BuildOptions build;
-  SystemOptions system;
-  InferenceOptions inference;
-  ReportingOptions reporting;
-  bool helps{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
-
-bool parseHelp(Arguments& arguments);
-
-void helpHelp(std::ostream& out);
-
-// Functions to print options
-
-std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const UffInput& input);
-
-std::ostream& operator<<(std::ostream& os, const IOFormat& format);
-
-std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
-
-std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const AllOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
-
-inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
-  for (int32_t i = 0; i < dims.nbDims; ++i) {
-    os << (i ? "x" : "") << dims.d[i];
-  }
-  return os;
-}
-inline std::ostream& operator<<(std::ostream& os,
-                                const nvinfer1::WeightsRole role) {
-  switch (role) {
-  case nvinfer1::WeightsRole::kKERNEL: {
-    os << "Kernel";
-    break;
-  }
-  case nvinfer1::WeightsRole::kBIAS: {
-    os << "Bias";
-    break;
-  }
-  case nvinfer1::WeightsRole::kSHIFT: {
-    os << "Shift";
-    break;
-  }
-  case nvinfer1::WeightsRole::kSCALE: {
-    os << "Scale";
-    break;
-  }
-  case nvinfer1::WeightsRole::kCONSTANT: {
-    os << "Constant";
-    break;
-  }
-  case nvinfer1::WeightsRole::kANY: {
-    os << "Any";
-    break;
-  }
-  }
-
-  return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os,
-                                const std::vector<int32_t>& vec) {
-  for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
-    os << (i ? "x" : "") << vec[i];
-  }
-  return os;
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLES_OPTIONS_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <exception>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <numeric>
-#include <utility>
-
-#include "sampleInference.h"
-#include "sampleOptions.h"
-#include "sampleReporting.h"
-
-using namespace nvinfer1;
-
-namespace sample {
-
-namespace {
-
-//!
-//! \brief Find percentile in an ascending sequence of timings
-//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
-//!
-template <typename T>
-float findPercentile(float percentile,
-                     std::vector<InferenceTime> const& timings,
-                     T const& toFloat) {
-  int32_t const all = static_cast<int32_t>(timings.size());
-  int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
-  if (timings.empty()) {
-    return std::numeric_limits<float>::infinity();
-  }
-  if (percentile < 0.0f || percentile > 100.0f) {
-    throw std::runtime_error("percentile is not in [0, 100]!");
-  }
-  return toFloat(timings[std::max(all - 1 - exclude, 0)]);
-}
-
-//!
-//! \brief Find median in a sorted sequence of timings
-//!
-template <typename T>
-float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
-  if (timings.empty()) {
-    return std::numeric_limits<float>::infinity();
-  }
-
-  int32_t const m = timings.size() / 2;
-  if (timings.size() % 2) {
-    return toFloat(timings[m]);
-  }
-
-  return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
-}
-
-//!
-//! \brief Find coefficient of variance (which is std / mean) in a sorted
-//! sequence of timings given the mean
-//!
-template <typename T>
-float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
-                          T const& toFloat, float mean) {
-  if (timings.empty()) {
-    return 0;
-  }
-
-  if (mean == 0.F) {
-    return std::numeric_limits<float>::infinity();
-  }
-
-  auto const metricAccumulator = [toFloat, mean](float acc,
-                                                 InferenceTime const& a) {
-    float const diff = toFloat(a) - mean;
-    return acc + diff * diff;
-  };
-  float const variance =
-      std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
-      timings.size();
-
-  return std::sqrt(variance) / mean * 100.F;
-}
-
-inline InferenceTime traceToTiming(const InferenceTrace& a) {
-  return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
-                       (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
-                       (a.d2hEnd - a.h2dStart));
-}
-
-} // namespace
-
-void printProlog(int32_t warmups, int32_t timings, float warmupMs,
-                 float benchTimeMs, std::ostream& os) {
-  os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
-     << std::endl;
-  os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
-     << " s" << std::endl;
-}
-
-void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
-                 std::ostream& os) {
-  int32_t count = 0;
-  InferenceTime sum;
-
-  os << std::endl;
-  os << "=== Trace details ===" << std::endl;
-  os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
-  for (auto const& t : timings) {
-    sum += t;
-
-    if (++count == runsPerAvg) {
-      // clang-format off
-            os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
-               << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
-               << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
-      // clang-format on
-      count = 0;
-      sum.enq = 0;
-      sum.h2d = 0;
-      sum.compute = 0;
-      sum.d2h = 0;
-      sum.e2e = 0;
-    }
-  }
-}
-
-void printMetricExplanations(std::ostream& os) {
-  os << std::endl;
-  os << "=== Explanations of the performance metrics ===" << std::endl;
-  os << "Total Host Walltime: the host walltime from when the first query "
-        "(after warmups) is enqueued to when the "
-        "last query is completed."
-     << std::endl;
-  os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
-     << std::endl;
-  os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
-        "the queries. If this is significantly "
-        "shorter than Total Host Walltime, the GPU may be under-utilized "
-        "because of host-side overheads or data "
-        "transfers."
-     << std::endl;
-  os << "Throughput: the observed throughput computed by dividing the number "
-        "of queries by the Total Host Walltime. "
-        "If this is significantly lower than the reciprocal of GPU Compute "
-        "Time, the GPU may be under-utilized "
-        "because of host-side overheads or data transfers."
-     << std::endl;
-  os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
-        "than GPU Compute Time, the GPU may be "
-        "under-utilized."
-     << std::endl;
-  os << "H2D Latency: the latency for host-to-device data transfers for input "
-        "tensors of a single query."
-     << std::endl;
-  os << "D2H Latency: the latency for device-to-host data transfers for output "
-        "tensors of a single query."
-     << std::endl;
-  os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
-        "Latency. This is the latency to infer a "
-        "single query."
-     << std::endl;
-  os << "End-to-End Host Latency: the duration from when the H2D of a query is "
-        "called to when the D2H of the same "
-        "query is completed, which includes the latency to wait for the "
-        "completion of the previous query. This is "
-        "the latency of a query if multiple queries are enqueued consecutively."
-     << std::endl;
-}
-
-PerformanceResult
-getPerformanceResult(std::vector<InferenceTime> const& timings,
-                     std::function<float(InferenceTime const&)> metricGetter,
-                     float percentile) {
-  auto const metricComparator = [metricGetter](InferenceTime const& a,
-                                               InferenceTime const& b) {
-    return metricGetter(a) < metricGetter(b);
-  };
-  auto const metricAccumulator = [metricGetter](float acc,
-                                                InferenceTime const& a) {
-    return acc + metricGetter(a);
-  };
-  std::vector<InferenceTime> newTimings = timings;
-  std::sort(newTimings.begin(), newTimings.end(), metricComparator);
-  PerformanceResult result;
-  result.min = metricGetter(newTimings.front());
-  result.max = metricGetter(newTimings.back());
-  result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
-                                metricAccumulator) /
-                newTimings.size();
-  result.median = findMedian(newTimings, metricGetter);
-  result.percentile = findPercentile(percentile, newTimings, metricGetter);
-  result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
-  return result;
-}
-
-void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
-                 float percentile, int32_t batchSize, std::ostream& osInfo,
-                 std::ostream& osWarning, std::ostream& osVerbose) {
-  float const throughput = batchSize * timings.size() / walltimeMs * 1000;
-
-  auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
-  auto const latencyResult =
-      getPerformanceResult(timings, getLatency, percentile);
-
-  auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
-  auto const e2eLatencyResult =
-      getPerformanceResult(timings, getEndToEnd, percentile);
-
-  auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
-  auto const enqueueResult =
-      getPerformanceResult(timings, getEnqueue, percentile);
-
-  auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
-  auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
-
-  auto const getCompute = [](InferenceTime const& t) { return t.compute; };
-  auto const gpuComputeResult =
-      getPerformanceResult(timings, getCompute, percentile);
-
-  auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
-  auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
-
-  auto const toPerfString = [percentile](const PerformanceResult& r) {
-    std::stringstream s;
-    s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
-      << " ms, "
-      << "median = " << r.median << " ms, percentile(" << percentile
-      << "%) = " << r.percentile << " ms";
-    return s.str();
-  };
-
-  osInfo << std::endl;
-  osInfo << "=== Performance summary ===" << std::endl;
-  osInfo << "Throughput: " << throughput << " qps" << std::endl;
-  osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
-  osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
-         << std::endl;
-  osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
-  osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
-  osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
-  osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
-  osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
-  osInfo << "Total GPU Compute Time: "
-         << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
-
-  // Report warnings if the throughput is bound by other factors than GPU
-  // Compute Time.
-  constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
-  if (enqueueResult.median >
-      kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
-                 "Compute and the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  If not already in use, --useCudaGraph (utilize CUDA graphs "
-                 "where possible) may increase the "
-                 "throughput."
-              << std::endl;
-  }
-  if (h2dResult.median >= gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by host-to-device transfers for "
-                 "the inputs rather than GPU Compute and "
-                 "the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  Add --noDataTransfers flag to disable data transfers."
-              << std::endl;
-  }
-  if (d2hResult.median >= gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by device-to-host transfers for "
-                 "the outputs rather than GPU Compute "
-                 "and the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  Add --noDataTransfers flag to disable data transfers."
-              << std::endl;
-  }
-
-  // Report warnings if the GPU Compute Time is unstable.
-  constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
-  if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
-    osWarning
-        << "* GPU compute time is unstable, with coefficient of variance = "
-        << gpuComputeResult.coeffVar << "%." << std::endl;
-    osWarning << "  If not already in use, locking GPU clock frequency or "
-                 "adding --useSpinWait may improve the "
-              << "stability." << std::endl;
-  }
-
-  // Explain what the metrics mean.
-  osInfo << "Explanations of the performance metrics are printed in the "
-            "verbose logs."
-         << std::endl;
-  printMetricExplanations(osVerbose);
-
-  osInfo << std::endl;
-}
-
-void printPerformanceReport(std::vector<InferenceTrace> const& trace,
-                            const ReportingOptions& reporting, float warmupMs,
-                            int32_t batchSize, std::ostream& osInfo,
-                            std::ostream& osWarning, std::ostream& osVerbose) {
-  auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
-    return a.computeStart >= warmupMs;
-  };
-  auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
-  int32_t const warmups = noWarmup - trace.begin();
-  float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
-  // when implicit batch used, batchSize = options.inference.batch, which is
-  // parsed through --batch
-  // when explicit batch used, batchSize = options.inference.batch = 0
-  // treat inference with explicit batch as a single query and report the
-  // throughput
-  batchSize = batchSize ? batchSize : 1;
-  printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
-              warmupMs, benchTime, osInfo);
-
-  std::vector<InferenceTime> timings(trace.size() - warmups);
-  std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
-  printTiming(timings, reporting.avgs, osInfo);
-  printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
-              osWarning, osVerbose);
-
-  if (!reporting.exportTimes.empty()) {
-    exportJSONTrace(trace, reporting.exportTimes);
-  }
-}
-
-//! Printed format:
-//! [ value, ...]
-//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
-//! h2d" : time, "start compute" : time,
-//!             "end compute" : time, "start d2h" : time, "end d2h" : time,
-//!             "h2d" : time, "compute" : time,
-//!             "d2h" : time, "latency" : time, "end to end" : time }
-//!
-void exportJSONTrace(std::vector<InferenceTrace> const& trace,
-                     std::string const& fileName) {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  os << "[" << std::endl;
-  char const* sep = "  ";
-  for (auto const& t : trace) {
-    InferenceTime const it(traceToTiming(t));
-    os << sep << "{ ";
-    sep = ", ";
-    // clang-format off
-        os << "\"startEnqMs\" : "     << t.enqStart     << sep << "\"endEnqMs\" : "     << t.enqEnd     << sep
-           << "\"startH2dMs\" : "     << t.h2dStart     << sep << "\"endH2dMs\" : "     << t.h2dEnd     << sep
-           << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
-           << "\"startD2hMs\" : "     << t.d2hStart     << sep << "\"endD2hMs\" : "     << t.d2hEnd     << sep
-           << "\"h2dMs\" : "          << it.h2d         << sep << "\"computeMs\" : "    << it.compute   << sep
-           << "\"d2hMs\" : "          << it.d2h         << sep << "\"latencyMs\" : "    << it.latency() << sep
-           << "\"endToEndMs\" : "     << it.e2e         << " }"                                         << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
-  if (mIterator == mLayers.end()) {
-    bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
-    mUpdatesCount += mLayers.empty() || first;
-    if (first) {
-      mIterator = mLayers.begin();
-    } else {
-      mLayers.emplace_back();
-      mLayers.back().name = layerName;
-      mIterator = mLayers.end() - 1;
-    }
-  }
-
-  mIterator->timeMs += timeMs;
-  ++mIterator;
-}
-
-void Profiler::print(std::ostream& os) const noexcept {
-  std::string const nameHdr("Layer");
-  std::string const timeHdr("   Time (ms)");
-  std::string const avgHdr("   Avg. Time (ms)");
-  std::string const percentageHdr("   Time %");
-
-  float const totalTimeMs = getTotalTime();
-
-  auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
-    return a.name.size() < b.name.size();
-  };
-  auto const longestName =
-      std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
-  auto const nameLength =
-      std::max(longestName->name.size() + 1, nameHdr.size());
-  auto const timeLength = timeHdr.size();
-  auto const avgLength = avgHdr.size();
-  auto const percentageLength = percentageHdr.size();
-
-  os << std::endl
-     << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
-     << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
-     << std::endl;
-
-  for (auto const& p : mLayers) {
-    // clang-format off
-        os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
-           << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
-           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
-           << std::endl;
-    }
-    {
-        os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
-           << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
-           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
-    // clang-format on
-  }
-  os << std::endl;
-}
-
-void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  os << "[" << std::endl
-     << "  { \"count\" : " << mUpdatesCount << " }" << std::endl;
-
-  auto const totalTimeMs = getTotalTime();
-
-  for (auto const& l : mLayers) {
-    // clang-format off
-        os << ", {" << " \"name\" : \""      << l.name << "\""
-                       ", \"timeMs\" : "     << l.timeMs
-           <<          ", \"averageMs\" : "  << l.timeMs / mUpdatesCount
-           <<          ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
-           << " }"  << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-void dumpInputs(nvinfer1::IExecutionContext const& context,
-                Bindings const& bindings, std::ostream& os) {
-  os << "Input Tensors:" << std::endl;
-  bindings.dumpInputs(context, os);
-}
-
-void dumpOutputs(nvinfer1::IExecutionContext const& context,
-                 Bindings const& bindings, std::ostream& os) {
-  os << "Output Tensors:" << std::endl;
-  bindings.dumpOutputs(context, os);
-}
-
-void exportJSONOutput(nvinfer1::IExecutionContext const& context,
-                      Bindings const& bindings, std::string const& fileName,
-                      int32_t batch) {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  std::string sep = "  ";
-  auto const output = bindings.getOutputBindings();
-  os << "[" << std::endl;
-  for (auto const& binding : output) {
-    // clang-format off
-        os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
-        sep = ", ";
-        os << "  " << sep << "\"dimensions\" : \"";
-        bindings.dumpBindingDimensions(binding.second, context, os);
-        os << "\"" << std::endl;
-        os << "  " << sep << "\"values\" : [ ";
-        bindings.dumpBindingValues(context, binding.second, os, sep, batch);
-        os << " ]" << std::endl << "  }" << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-} // namespace sample
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_REPORTING_H
-#define TRT_SAMPLE_REPORTING_H
-
-#include <functional>
-#include <iostream>
-
-#include "NvInfer.h"
-
-#include "sampleOptions.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-//!
-//! \struct InferenceTime
-//! \brief Measurement times in milliseconds
-//!
-struct InferenceTime {
-  InferenceTime(float q, float i, float c, float o, float e)
-      : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
-
-  InferenceTime() = default;
-  InferenceTime(InferenceTime const&) = default;
-  InferenceTime(InferenceTime&&) = default;
-  InferenceTime& operator=(InferenceTime const&) = default;
-  InferenceTime& operator=(InferenceTime&&) = default;
-  ~InferenceTime() = default;
-
-  float enq{0};     // Enqueue
-  float h2d{0};     // Host to Device
-  float compute{0}; // Compute
-  float d2h{0};     // Device to Host
-  float e2e{0};     // end to end
-
-  // ideal latency
-  float latency() const { return h2d + compute + d2h; }
-};
-
-//!
-//! \struct InferenceTrace
-//! \brief Measurement points in milliseconds
-//!
-struct InferenceTrace {
-  InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
-                 float ce, float os, float oe)
-      : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
-        computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
-
-  InferenceTrace() = default;
-  InferenceTrace(InferenceTrace const&) = default;
-  InferenceTrace(InferenceTrace&&) = default;
-  InferenceTrace& operator=(InferenceTrace const&) = default;
-  InferenceTrace& operator=(InferenceTrace&&) = default;
-  ~InferenceTrace() = default;
-
-  int32_t stream{0};
-  float enqStart{0};
-  float enqEnd{0};
-  float h2dStart{0};
-  float h2dEnd{0};
-  float computeStart{0};
-  float computeEnd{0};
-  float d2hStart{0};
-  float d2hEnd{0};
-};
-
-inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
-  return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
-                       a.d2h + b.d2h, a.e2e + b.e2e);
-}
-
-inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
-  return a = a + b;
-}
-
-//!
-//! \struct PerformanceResult
-//! \brief Performance result of a performance metric
-//!
-struct PerformanceResult {
-  float min{0};
-  float max{0};
-  float mean{0};
-  float median{0};
-  float percentile{0};
-  float coeffVar{0}; // coefficient of variation
-};
-
-//!
-//! \brief Print benchmarking time and number of traces collected
-//!
-void printProlog(int32_t warmups, int32_t timings, float warmupMs,
-                 float walltime, std::ostream& os);
-
-//!
-//! \brief Print a timing trace
-//!
-void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
-                 std::ostream& os);
-
-//!
-//! \brief Print the performance summary of a trace
-//!
-void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
-                 int32_t batchSize, std::ostream& osInfo,
-                 std::ostream& osWarning, std::ostream& osVerbose);
-
-//!
-//! \brief Get the result of a specific performance metric from a trace
-//!
-PerformanceResult
-getPerformanceResult(std::vector<InferenceTime> const& timings,
-                     std::function<float(InferenceTime const&)> metricGetter,
-                     float percentile);
-
-//!
-//! \brief Print the explanations of the performance metrics printed in
-//! printEpilog() function.
-//!
-void printMetricExplanations(std::ostream& os);
-
-//!
-//! \brief Print and summarize a timing trace
-//!
-void printPerformanceReport(std::vector<InferenceTrace> const& trace,
-                            ReportingOptions const& reporting, float warmupMs,
-                            int32_t batchSize, std::ostream& osInfo,
-                            std::ostream& osWarning, std::ostream& osVerbose);
-
-//!
-//! \brief Export a timing trace to JSON file
-//!
-void exportJSONTrace(std::vector<InferenceTrace> const& trace,
-                     std::string const& fileName);
-
-//!
-//! \brief Print input tensors to stream
-//!
-void dumpInputs(nvinfer1::IExecutionContext const& context,
-                Bindings const& bindings, std::ostream& os);
-
-//!
-//! \brief Print output tensors to stream
-//!
-void dumpOutputs(nvinfer1::IExecutionContext const& context,
-                 Bindings const& bindings, std::ostream& os);
-
-//!
-//! \brief Export output tensors to JSON file
-//!
-void exportJSONOutput(nvinfer1::IExecutionContext const& context,
-                      Bindings const& bindings, std::string const& fileName,
-                      int32_t batch);
-
-//!
-//! \struct LayerProfile
-//! \brief Layer profile information
-//!
-struct LayerProfile {
-  std::string name;
-  float timeMs{0};
-};
-
-//!
-//! \class Profiler
-//! \brief Collect per-layer profile information, assuming times are reported in
-//! the same order
-//!
-class Profiler : public nvinfer1::IProfiler {
- public:
-  void reportLayerTime(char const* layerName, float timeMs) noexcept override;
-
-  void print(std::ostream& os) const noexcept;
-
-  //!
-  //! \brief Export a profile to JSON file
-  //!
-  void exportJSONProfile(std::string const& fileName) const noexcept;
-
- private:
-  float getTotalTime() const noexcept {
-    auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
-      return accumulator + lp.timeMs;
-    };
-    return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
-  }
-
-  std::vector<LayerProfile> mLayers;
-  std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
-  int32_t mUpdatesCount{0};
-};
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_REPORTING_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_UTILS_H
-#define TRT_SAMPLE_UTILS_H
-
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <numeric>
-#include <random>
-#include <unordered_map>
-#include <vector>
-
-#include <cuda.h>
-#include <cuda_fp16.h>
-
-#include "NvInfer.h"
-
-#include "common.h"
-#include "logger.h"
-#include "sampleDevice.h"
-#include "sampleOptions.h"
-
-namespace sample {
-
-inline int dataTypeSize(nvinfer1::DataType dataType) {
-  switch (dataType) {
-  case nvinfer1::DataType::kINT32:
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kBOOL:
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  }
-  return 0;
-}
-
-template <typename T> inline T roundUp(T m, T n) {
-  return ((m + n - 1) / n) * n;
-}
-
-inline int volume(const nvinfer1::Dims& d) {
-  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
-}
-
-//! comps is the number of components in a vector. Ignored if vecDim < 0.
-inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
-                      int vecDim, int comps, int batch) {
-  int maxNbElems = 1;
-  for (int i = 0; i < dims.nbDims; ++i) {
-    // Get effective length of axis.
-    int d = dims.d[i];
-    // Any dimension is 0, it is an empty tensor.
-    if (d == 0) {
-      return 0;
-    }
-    if (i == vecDim) {
-      d = samplesCommon::divUp(d, comps);
-    }
-    maxNbElems = std::max(maxNbElems, d * strides.d[i]);
-  }
-  return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
-}
-
-inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
-  if (vecDim != -1) {
-    dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
-  }
-  return volume(dims) * std::max(batch, 1);
-}
-
-inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
-  int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
-  if (static_cast<int>(vec.size()) > limit) {
-    sample::gLogWarning
-        << "Vector too long, only first 8 elements are used in dimension."
-        << std::endl;
-  }
-  // Pick first nvinfer1::Dims::MAX_DIMS elements
-  nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
-  std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
-  return dims;
-}
-
-template <typename T>
-inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
-  T* typedBuffer = static_cast<T*>(buffer);
-  std::default_random_engine engine;
-  if (std::is_integral<T>::value) {
-    std::uniform_int_distribution<int> distribution(min, max);
-    auto generator = [&engine, &distribution]() {
-      return static_cast<T>(distribution(engine));
-    };
-    std::generate(typedBuffer, typedBuffer + volume, generator);
-  } else {
-    std::uniform_real_distribution<float> distribution(min, max);
-    auto generator = [&engine, &distribution]() {
-      return static_cast<T>(distribution(engine));
-    };
-    std::generate(typedBuffer, typedBuffer + volume, generator);
-  }
-}
-
-// Specialization needed for custom type __half
-template <typename H>
-inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
-  H* typedBuffer = static_cast<H*>(buffer);
-  std::default_random_engine engine;
-  std::uniform_real_distribution<float> distribution(min, max);
-  auto generator = [&engine, &distribution]() {
-    return static_cast<H>(distribution(engine));
-  };
-  std::generate(typedBuffer, typedBuffer + volume, generator);
-}
-template <>
-inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
-                               __half max) {
-  fillBufferHalf(buffer, volume, min, max);
-}
-
-template <typename T>
-inline void dumpBuffer(const void* buffer, const std::string& separator,
-                       std::ostream& os, const Dims& dims, const Dims& strides,
-                       int32_t vectorDim, int32_t spv) {
-  const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
-                                         std::multiplies<int64_t>());
-  const T* typedBuffer = static_cast<const T*>(buffer);
-  std::string sep;
-  for (int64_t v = 0; v < volume; ++v) {
-    int64_t curV = v;
-    int32_t dataOffset = 0;
-    for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
-      int32_t dimVal = curV % dims.d[dimIndex];
-      if (dimIndex == vectorDim) {
-        dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
-      } else {
-        dataOffset +=
-            dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
-      }
-      curV /= dims.d[dimIndex];
-      ASSERT(curV >= 0);
-    }
-
-    os << sep << typedBuffer[dataOffset];
-    sep = separator;
-  }
-}
-
-inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
-  ASSERT(dst);
-
-  std::ifstream file(fileName, std::ios::in | std::ios::binary);
-  if (file.is_open()) {
-    file.read(dst, size);
-    file.close();
-  } else {
-    std::stringstream msg;
-    msg << "Cannot open file " << fileName << "!";
-    throw std::invalid_argument(msg.str());
-  }
-}
-
-struct Binding {
-  bool isInput{false};
-  std::unique_ptr<IMirroredBuffer> buffer;
-  int64_t volume{0};
-  nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
-
-  void fill(const std::string& fileName) {
-    loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
-                 buffer->getSize());
-  }
-
-  void fill() {
-    switch (dataType) {
-    case nvinfer1::DataType::kBOOL: {
-      fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
-      break;
-    }
-    case nvinfer1::DataType::kINT32: {
-      fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
-      break;
-    }
-    case nvinfer1::DataType::kINT8: {
-      fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
-      break;
-    }
-    case nvinfer1::DataType::kFLOAT: {
-      fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
-      break;
-    }
-    case nvinfer1::DataType::kHALF: {
-      fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
-      break;
-    }
-    }
-  }
-
-  void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
-            int32_t spv, const std::string separator = " ") const {
-    switch (dataType) {
-    case nvinfer1::DataType::kBOOL: {
-      dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
-                       vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kINT32: {
-      dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
-                          vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kINT8: {
-      dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
-                         vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kFLOAT: {
-      dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
-                        vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kHALF: {
-      dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
-                         vectorDim, spv);
-      break;
-    }
-    }
-  }
-};
-
-class Bindings {
- public:
-  Bindings() = delete;
-  explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
-
-  void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
-                  nvinfer1::DataType dataType,
-                  const std::string& fileName = "") {
-    while (mBindings.size() <= static_cast<size_t>(b)) {
-      mBindings.emplace_back();
-      mDevicePointers.emplace_back();
-    }
-    mNames[name] = b;
-    if (mBindings[b].buffer == nullptr) {
-      if (mUseManaged) {
-        mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
-      } else {
-        mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
-      }
-    }
-    mBindings[b].isInput = isInput;
-    // Some memory allocators return nullptr when allocating zero bytes, but
-    // TensorRT requires a non-null ptr
-    // even for empty tensors, so allocate a dummy byte.
-    if (volume == 0) {
-      mBindings[b].buffer->allocate(1);
-    } else {
-      mBindings[b].buffer->allocate(
-          static_cast<size_t>(volume) *
-          static_cast<size_t>(dataTypeSize(dataType)));
-    }
-    mBindings[b].volume = volume;
-    mBindings[b].dataType = dataType;
-    mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
-    if (isInput) {
-      if (fileName.empty()) {
-        fill(b);
-      } else {
-        fill(b, fileName);
-      }
-    }
-  }
-
-  void** getDeviceBuffers() { return mDevicePointers.data(); }
-
-  void transferInputToDevice(TrtCudaStream& stream) {
-    for (auto& b : mNames) {
-      if (mBindings[b.second].isInput) {
-        mBindings[b.second].buffer->hostToDevice(stream);
-      }
-    }
-  }
-
-  void transferOutputToHost(TrtCudaStream& stream) {
-    for (auto& b : mNames) {
-      if (!mBindings[b.second].isInput) {
-        mBindings[b.second].buffer->deviceToHost(stream);
-      }
-    }
-  }
-
-  void fill(int binding, const std::string& fileName) {
-    mBindings[binding].fill(fileName);
-  }
-
-  void fill(int binding) { mBindings[binding].fill(); }
-
-  void dumpBindingDimensions(int binding,
-                             const nvinfer1::IExecutionContext& context,
-                             std::ostream& os) const {
-    const auto dims = context.getBindingDimensions(binding);
-    // Do not add a newline terminator, because the caller may be outputting a
-    // JSON string.
-    os << dims;
-  }
-
-  void dumpBindingValues(const nvinfer1::IExecutionContext& context,
-                         int binding, std::ostream& os,
-                         const std::string& separator = " ",
-                         int32_t batch = 1) const {
-    Dims dims = context.getBindingDimensions(binding);
-    Dims strides = context.getStrides(binding);
-    int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
-    const int32_t spv =
-        context.getEngine().getBindingComponentsPerElement(binding);
-
-    if (context.getEngine().hasImplicitBatchDimension()) {
-      auto insertN = [](Dims& d, int32_t bs) {
-        const int32_t nbDims = d.nbDims;
-        ASSERT(nbDims < Dims::MAX_DIMS);
-        std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
-        d.d[0] = bs;
-        d.nbDims = nbDims + 1;
-      };
-      int32_t batchStride = 0;
-      for (int32_t i = 0; i < strides.nbDims; ++i) {
-        if (strides.d[i] * dims.d[i] > batchStride) {
-          batchStride = strides.d[i] * dims.d[i];
-        }
-      }
-      insertN(dims, batch);
-      insertN(strides, batchStride);
-      vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
-    }
-
-    mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
-  }
-
-  void dumpInputs(const nvinfer1::IExecutionContext& context,
-                  std::ostream& os) const {
-    auto isInput = [](const Binding& b) { return b.isInput; };
-    dumpBindings(context, isInput, os);
-  }
-
-  void dumpOutputs(const nvinfer1::IExecutionContext& context,
-                   std::ostream& os) const {
-    auto isOutput = [](const Binding& b) { return !b.isInput; };
-    dumpBindings(context, isOutput, os);
-  }
-
-  void dumpBindings(const nvinfer1::IExecutionContext& context,
-                    std::ostream& os) const {
-    auto all = [](const Binding& b) { return true; };
-    dumpBindings(context, all, os);
-  }
-
-  void dumpBindings(const nvinfer1::IExecutionContext& context,
-                    bool (*predicate)(const Binding& b),
-                    std::ostream& os) const {
-    for (const auto& n : mNames) {
-      const auto binding = n.second;
-      if (predicate(mBindings[binding])) {
-        os << n.first << ": (";
-        dumpBindingDimensions(binding, context, os);
-        os << ")" << std::endl;
-
-        dumpBindingValues(context, binding, os);
-        os << std::endl;
-      }
-    }
-  }
-
-  std::unordered_map<std::string, int> getInputBindings() const {
-    auto isInput = [](const Binding& b) { return b.isInput; };
-    return getBindings(isInput);
-  }
-
-  std::unordered_map<std::string, int> getOutputBindings() const {
-    auto isOutput = [](const Binding& b) { return !b.isInput; };
-    return getBindings(isOutput);
-  }
-
-  std::unordered_map<std::string, int> getBindings() const {
-    auto all = [](const Binding& b) { return true; };
-    return getBindings(all);
-  }
-
-  std::unordered_map<std::string, int>
-  getBindings(bool (*predicate)(const Binding& b)) const {
-    std::unordered_map<std::string, int> bindings;
-    for (const auto& n : mNames) {
-      const auto binding = n.second;
-      if (predicate(mBindings[binding])) {
-        bindings.insert(n);
-      }
-    }
-    return bindings;
-  }
-
- private:
-  std::unordered_map<std::string, int32_t> mNames;
-  std::vector<Binding> mBindings;
-  std::vector<void*> mDevicePointers;
-  bool mUseManaged{false};
-};
-
-template <typename T> struct TrtDestroyer {
-  void operator()(T* t) { t->destroy(); }
-};
-
-template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
-
-inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
-                               size_t nbBindings, bool isInput = true) {
-  bool broadcast = formats.size() == 1;
-  bool validFormatsCount = broadcast || (formats.size() == nbBindings);
-  if (!formats.empty() && !validFormatsCount) {
-    if (isInput) {
-      throw std::invalid_argument(
-          "The number of inputIOFormats must match network's inputs or be one "
-          "for broadcasting.");
-    } else {
-      throw std::invalid_argument(
-          "The number of outputIOFormats must match network's outputs or be "
-          "one for broadcasting.");
-    }
-  }
-  return broadcast;
-}
-
-inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
-  std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
-  if (!iFile) {
-    sample::gLogWarning << "Could not read timing cache from: " << inFileName
-                        << ". A new timing cache will be generated and written."
-                        << std::endl;
-    return std::vector<char>();
-  }
-  iFile.seekg(0, std::ifstream::end);
-  size_t fsize = iFile.tellg();
-  iFile.seekg(0, std::ifstream::beg);
-  std::vector<char> content(fsize);
-  iFile.read(content.data(), fsize);
-  iFile.close();
-  sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
-                   << inFileName << std::endl;
-  return content;
-}
-
-inline void saveTimingCacheFile(const std::string outFileName,
-                                const IHostMemory* blob) {
-  std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
-  if (!oFile) {
-    sample::gLogWarning << "Could not write timing cache to: " << outFileName
-                        << std::endl;
-    return;
-  }
-  oFile.write((char*)blob->data(), blob->size());
-  oFile.close();
-  sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
-                   << outFileName << std::endl;
-}
-
-inline int32_t getCudaDriverVersion() {
-  int32_t version{-1};
-  cudaCheck(cudaDriverGetVersion(&version));
-  return version;
-}
-
-inline int32_t getCudaRuntimeVersion() {
-  int32_t version{-1};
-  cudaCheck(cudaRuntimeGetVersion(&version));
-  return version;
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_UTILS_H
--- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c
+++ b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c
@@ -1,568 +0,0 @@
-/*	$OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $	*/
-/*	$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $	*/
-
-/*
- * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Sponsored in part by the Defense Advanced Research Projects
- * Agency (DARPA) and Air Force Research Laboratory, Air Force
- * Materiel Command, USAF, under agreement number F39502-99-1-0512.
- */
-/*-
- * Copyright (c) 2000 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Dieter Baron and Thomas Klausner.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <getopt.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <windows.h>
-
-#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
-
-#ifdef REPLACE_GETOPT
-int opterr = 1;   /* if error message should be printed */
-int optind = 1;   /* index into parent argv vector */
-int optopt = '?'; /* character checked for validity */
-#undef optreset   /* see getopt.h */
-#define optreset __mingw_optreset
-int optreset; /* reset getopt */
-char* optarg; /* argument associated with option */
-#endif
-
-#define PRINT_ERROR ((opterr) && (*options != ':'))
-
-#define FLAG_PERMUTE 0x01  /* permute non-options to the end of argv */
-#define FLAG_ALLARGS 0x02  /* treat non-options as args to option "-1" */
-#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
-
-/* return values */
-#define BADCH (int) '?'
-#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
-#define INORDER (int) 1
-
-#ifndef __CYGWIN__
-#define __progname __argv[0]
-#else
-extern char __declspec(dllimport) * __progname;
-#endif
-
-#ifdef __CYGWIN__
-static char EMSG[] = "";
-#else
-#define EMSG ""
-#endif
-
-static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int);
-static int parse_long_options(char* const*, const char*, const struct option*, int*, int);
-static int gcd(int, int);
-static void permute_args(int, int, int, char* const*);
-
-static char* place = EMSG; /* option letter processing */
-
-/* XXX: set optreset to 1 rather than these two */
-static int nonopt_start = -1; /* first non option argument (for permute) */
-static int nonopt_end = -1;   /* first option after non options (for permute) */
-
-/* Error messages */
-static const char recargchar[] = "option requires an argument -- %c";
-static const char recargstring[] = "option requires an argument -- %s";
-static const char ambig[] = "ambiguous option -- %.*s";
-static const char noarg[] = "option doesn't take an argument -- %.*s";
-static const char illoptchar[] = "unknown option -- %c";
-static const char illoptstring[] = "unknown option -- %s";
-
-static void _vwarnx(const char* fmt, va_list ap)
-{
-    (void) fprintf(stderr, "%s: ", __progname);
-    if (fmt != NULL)
-        (void) vfprintf(stderr, fmt, ap);
-    (void) fprintf(stderr, "\n");
-}
-
-static void warnx(const char* fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    _vwarnx(fmt, ap);
-    va_end(ap);
-}
-
-/*
- * Compute the greatest common divisor of a and b.
- */
-static int gcd(int a, int b)
-{
-    int c;
-
-    c = a % b;
-    while (c != 0)
-    {
-        a = b;
-        b = c;
-        c = a % b;
-    }
-
-    return (b);
-}
-
-/*
- * Exchange the block from nonopt_start to nonopt_end with the block
- * from nonopt_end to opt_end (keeping the same order of arguments
- * in each block).
- */
-static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
-{
-    int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
-    char* swap;
-
-    /*
-     * compute lengths of blocks and number and size of cycles
-     */
-    nnonopts = panonopt_end - panonopt_start;
-    nopts = opt_end - panonopt_end;
-    ncycle = gcd(nnonopts, nopts);
-    cyclelen = (opt_end - panonopt_start) / ncycle;
-
-    for (i = 0; i < ncycle; i++)
-    {
-        cstart = panonopt_end + i;
-        pos = cstart;
-        for (j = 0; j < cyclelen; j++)
-        {
-            if (pos >= panonopt_end)
-                pos -= nnonopts;
-            else
-                pos += nopts;
-            swap = nargv[pos];
-            /* LINTED const cast */
-            ((char**) nargv)[pos] = nargv[cstart];
-            /* LINTED const cast */
-            ((char**) nargv)[cstart] = swap;
-        }
-    }
-}
-
-/*
- * parse_long_options --
- *	Parse long options in argc/argv argument vector.
- * Returns -1 if short_too is set and the option does not match long_options.
- */
-static int parse_long_options(
-    char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too)
-{
-    char *current_argv, *has_equal;
-    size_t current_argv_len;
-    int i, ambiguous, match;
-
-#define IDENTICAL_INTERPRETATION(_x, _y)                                                                               \
-    (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag    \
-        && long_options[(_x)].val == long_options[(_y)].val)
-
-    current_argv = place;
-    match = -1;
-    ambiguous = 0;
-
-    optind++;
-
-    if ((has_equal = strchr(current_argv, '=')) != NULL)
-    {
-        /* argument found (--option=arg) */
-        current_argv_len = has_equal - current_argv;
-        has_equal++;
-    }
-    else
-        current_argv_len = strlen(current_argv);
-
-    for (i = 0; long_options[i].name; i++)
-    {
-        /* find matching long option */
-        if (strncmp(current_argv, long_options[i].name, current_argv_len))
-            continue;
-
-        if (strlen(long_options[i].name) == current_argv_len)
-        {
-            /* exact match */
-            match = i;
-            ambiguous = 0;
-            break;
-        }
-        /*
-         * If this is a known short option, don't allow
-         * a partial match of a single character.
-         */
-        if (short_too && current_argv_len == 1)
-            continue;
-
-        if (match == -1) /* partial match */
-            match = i;
-        else if (!IDENTICAL_INTERPRETATION(i, match))
-            ambiguous = 1;
-    }
-    if (ambiguous)
-    {
-        /* ambiguous abbreviation */
-        if (PRINT_ERROR)
-            warnx(ambig, (int) current_argv_len, current_argv);
-        optopt = 0;
-        return (BADCH);
-    }
-    if (match != -1)
-    { /* option found */
-        if (long_options[match].has_arg == no_argument && has_equal)
-        {
-            if (PRINT_ERROR)
-                warnx(noarg, (int) current_argv_len, current_argv);
-            /*
-             * XXX: GNU sets optopt to val regardless of flag
-             */
-            if (long_options[match].flag == NULL)
-                optopt = long_options[match].val;
-            else
-                optopt = 0;
-            return (BADARG);
-        }
-        if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
-        {
-            if (has_equal)
-                optarg = has_equal;
-            else if (long_options[match].has_arg == required_argument)
-            {
-                /*
-                 * optional argument doesn't use next nargv
-                 */
-                optarg = nargv[optind++];
-            }
-        }
-        if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
-        {
-            /*
-             * Missing argument; leading ':' indicates no error
-             * should be generated.
-             */
-            if (PRINT_ERROR)
-                warnx(recargstring, current_argv);
-            /*
-             * XXX: GNU sets optopt to val regardless of flag
-             */
-            if (long_options[match].flag == NULL)
-                optopt = long_options[match].val;
-            else
-                optopt = 0;
-            --optind;
-            return (BADARG);
-        }
-    }
-    else
-    { /* unknown option */
-        if (short_too)
-        {
-            --optind;
-            return (-1);
-        }
-        if (PRINT_ERROR)
-            warnx(illoptstring, current_argv);
-        optopt = 0;
-        return (BADCH);
-    }
-    if (idx)
-        *idx = match;
-    if (long_options[match].flag)
-    {
-        *long_options[match].flag = long_options[match].val;
-        return (0);
-    }
-    else
-        return (long_options[match].val);
-#undef IDENTICAL_INTERPRETATION
-}
-
-/*
- * getopt_internal --
- *	Parse argc/argv argument vector.  Called by user level routines.
- */
-static int getopt_internal(
-    int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags)
-{
-    const char* oli; /* option letter list index */
-    int optchar, short_too;
-    static int posixly_correct = -1;
-
-    if (options == NULL)
-        return (-1);
-
-    /*
-     * XXX Some GNU programs (like cvs) set optind to 0 instead of
-     * XXX using optreset.  Work around this braindamage.
-     */
-    if (optind == 0)
-        optind = optreset = 1;
-
-    /*
-     * Disable GNU extensions if POSIXLY_CORRECT is set or options
-     * string begins with a '+'.
-     *
-     * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
-     *                 optreset != 0 for GNU compatibility.
-     */
-    if (posixly_correct == -1 || optreset != 0)
-        posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
-    if (*options == '-')
-        flags |= FLAG_ALLARGS;
-    else if (posixly_correct || *options == '+')
-        flags &= ~FLAG_PERMUTE;
-    if (*options == '+' || *options == '-')
-        options++;
-
-    optarg = NULL;
-    if (optreset)
-        nonopt_start = nonopt_end = -1;
-start:
-    if (optreset || !*place)
-    { /* update scanning pointer */
-        optreset = 0;
-        if (optind >= nargc)
-        { /* end of argument vector */
-            place = EMSG;
-            if (nonopt_end != -1)
-            {
-                /* do permutation, if we have to */
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                optind -= nonopt_end - nonopt_start;
-            }
-            else if (nonopt_start != -1)
-            {
-                /*
-                 * If we skipped non-options, set optind
-                 * to the first of them.
-                 */
-                optind = nonopt_start;
-            }
-            nonopt_start = nonopt_end = -1;
-            return (-1);
-        }
-        if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
-        {
-            place = EMSG; /* found non-option */
-            if (flags & FLAG_ALLARGS)
-            {
-                /*
-                 * GNU extension:
-                 * return non-option as argument to option 1
-                 */
-                optarg = nargv[optind++];
-                return (INORDER);
-            }
-            if (!(flags & FLAG_PERMUTE))
-            {
-                /*
-                 * If no permutation wanted, stop parsing
-                 * at first non-option.
-                 */
-                return (-1);
-            }
-            /* do permutation */
-            if (nonopt_start == -1)
-                nonopt_start = optind;
-            else if (nonopt_end != -1)
-            {
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                nonopt_start = optind - (nonopt_end - nonopt_start);
-                nonopt_end = -1;
-            }
-            optind++;
-            /* process next argument */
-            goto start;
-        }
-        if (nonopt_start != -1 && nonopt_end == -1)
-            nonopt_end = optind;
-
-        /*
-         * If we have "-" do nothing, if "--" we are done.
-         */
-        if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
-        {
-            optind++;
-            place = EMSG;
-            /*
-             * We found an option (--), so if we skipped
-             * non-options, we have to permute.
-             */
-            if (nonopt_end != -1)
-            {
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                optind -= nonopt_end - nonopt_start;
-            }
-            nonopt_start = nonopt_end = -1;
-            return (-1);
-        }
-    }
-
-    /*
-     * Check long options if:
-     *  1) we were passed some
-     *  2) the arg is not just "-"
-     *  3) either the arg starts with -- we are getopt_long_only()
-     */
-    if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
-    {
-        short_too = 0;
-        if (*place == '-')
-            place++; /* --foo long option */
-        else if (*place != ':' && strchr(options, *place) != NULL)
-            short_too = 1; /* could be short option too */
-
-        optchar = parse_long_options(nargv, options, long_options, idx, short_too);
-        if (optchar != -1)
-        {
-            place = EMSG;
-            return (optchar);
-        }
-    }
-
-    if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
-        || (oli = strchr(options, optchar)) == NULL)
-    {
-        /*
-         * If the user specified "-" and  '-' isn't listed in
-         * options, return -1 (non-option) as per POSIX.
-         * Otherwise, it is an unknown option character (or ':').
-         */
-        if (optchar == (int) '-' && *place == '\0')
-            return (-1);
-        if (!*place)
-            ++optind;
-        if (PRINT_ERROR)
-            warnx(illoptchar, optchar);
-        optopt = optchar;
-        return (BADCH);
-    }
-    if (long_options != NULL && optchar == 'W' && oli[1] == ';')
-    {
-        /* -W long-option */
-        if (*place) /* no space */
-            /* NOTHING */;
-        else if (++optind >= nargc)
-        { /* no arg */
-            place = EMSG;
-            if (PRINT_ERROR)
-                warnx(recargchar, optchar);
-            optopt = optchar;
-            return (BADARG);
-        }
-        else /* white space */
-            place = nargv[optind];
-        optchar = parse_long_options(nargv, options, long_options, idx, 0);
-        place = EMSG;
-        return (optchar);
-    }
-    if (*++oli != ':')
-    { /* doesn't take argument */
-        if (!*place)
-            ++optind;
-    }
-    else
-    { /* takes (optional) argument */
-        optarg = NULL;
-        if (*place) /* no white space */
-            optarg = place;
-        else if (oli[1] != ':')
-        { /* arg not optional */
-            if (++optind >= nargc)
-            { /* no arg */
-                place = EMSG;
-                if (PRINT_ERROR)
-                    warnx(recargchar, optchar);
-                optopt = optchar;
-                return (BADARG);
-            }
-            else
-                optarg = nargv[optind];
-        }
-        place = EMSG;
-        ++optind;
-    }
-    /* dump back option letter */
-    return (optchar);
-}
-
-#ifdef REPLACE_GETOPT
-/*
- * getopt --
- *	Parse argc/argv argument vector.
- *
- * [eventually this will replace the BSD getopt]
- */
-int getopt(int nargc, char* const* nargv, const char* options)
-{
-
-    /*
-     * We don't pass FLAG_PERMUTE to getopt_internal() since
-     * the BSD getopt(3) (unlike GNU) has never done this.
-     *
-     * Furthermore, since many privileged programs call getopt()
-     * before dropping privileges it makes sense to keep things
-     * as simple (and bug-free) as possible.
-     */
-    return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
-}
-#endif /* REPLACE_GETOPT */
-
-/*
- * getopt_long --
- *	Parse argc/argv argument vector.
- */
-int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
-{
-
-    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
-}
-
-/*
- * getopt_long_only --
- *	Parse argc/argv argument vector.
- */
-int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
-{
-
-    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
-}
--- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h
+++ b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h
@@ -1,124 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GETOPT_H__
-/**
- * DISCLAIMER
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is a part of the w64 mingw-runtime package.
- *
- * The w64 mingw-runtime package and its code is distributed in the hope that it
- * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
- * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
- * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#define __GETOPT_H__
-
-/* All the headers include this file. */
-#include <crtdefs.h>
-
-#if defined(WINGETOPT_SHARED_LIB)
-#if defined(BUILDING_WINGETOPT_DLL)
-#define WINGETOPT_API __declspec(dllexport)
-#else
-#define WINGETOPT_API __declspec(dllimport)
-#endif
-#else
-#define WINGETOPT_API
-#endif
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    WINGETOPT_API extern int optind; /* index of first non-option in argv      */
-    WINGETOPT_API extern int optopt; /* single option character, as parsed     */
-    WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
-    /* (user may set to zero, to suppress)    */
-
-    WINGETOPT_API extern char* optarg; /* pointer to argument of current option  */
-
-    extern int getopt(int nargc, char* const* nargv, const char* options);
-
-#ifdef _BSD_SOURCE
-/*
- * BSD adds the non-standard `optreset' feature, for reinitialisation
- * of `getopt' parsing.  We support this feature, for applications which
- * proclaim their BSD heritage, before including this header; however,
- * to maintain portability, developers are advised to avoid it.
- */
-#define optreset __mingw_optreset
-    extern int optreset;
-#endif
-#ifdef __cplusplus
-}
-#endif
-/*
- * POSIX requires the `getopt' API to be specified in `unistd.h';
- * thus, `unistd.h' includes this header.  However, we do not want
- * to expose the `getopt_long' or `getopt_long_only' APIs, when
- * included in this manner.  Thus, close the standard __GETOPT_H__
- * declarations block, and open an additional __GETOPT_LONG_H__
- * specific block, only when *not* __UNISTD_H_SOURCED__, in which
- * to declare the extended API.
- */
-#endif /* !defined(__GETOPT_H__) */
-
-#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
-#define __GETOPT_LONG_H__
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    struct option /* specification for a long form option...	*/
-    {
-        const char* name; /* option name, without leading hyphens */
-        int has_arg;      /* does it take an argument?		*/
-        int* flag;        /* where to save its status, or NULL	*/
-        int val;          /* its associated status value		*/
-    };
-
-    enum /* permitted values for its `has_arg' field...	*/
-    {
-        no_argument = 0,   /* option never takes an argument	*/
-        required_argument, /* option always requires an argument	*/
-        optional_argument  /* option may take an argument		*/
-    };
-
-    extern int getopt_long(
-        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
-    extern int getopt_long_only(
-        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
-/*
- * Previous MinGW implementation had...
- */
-#ifndef HAVE_DECL_GETOPT
-/*
- * ...for the long form API only; keep this for compatibility.
- */
-#define HAVE_DECL_GETOPT 1
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
--- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -1,528 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
-#include "paddle2onnx/converter.h"
-#endif
-
-namespace fastdeploy {
-size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
-  if (dtype == nvinfer1::DataType::kFLOAT) {
-    return sizeof(float);
-  } else if (dtype == nvinfer1::DataType::kHALF) {
-    return sizeof(float) / 2;
-  } else if (dtype == nvinfer1::DataType::kINT8) {
-    return sizeof(int8_t);
-  } else if (dtype == nvinfer1::DataType::kINT32) {
-    return sizeof(int32_t);
-  }
-  // kBOOL
-  return sizeof(bool);
-}
-
-FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
-  if (dtype == nvinfer1::DataType::kFLOAT) {
-    return FDDataType::FP32;
-  } else if (dtype == nvinfer1::DataType::kHALF) {
-    return FDDataType::FP16;
-  } else if (dtype == nvinfer1::DataType::kINT8) {
-    return FDDataType::INT8;
-  } else if (dtype == nvinfer1::DataType::kINT32) {
-    return FDDataType::INT32;
-  }
-  // kBOOL
-  return FDDataType::BOOL;
-}
-
-std::vector<int> toVec(const nvinfer1::Dims& dim) {
-  std::vector<int> out(dim.d, dim.d + dim.nbDims);
-  return out;
-}
-
-bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader,
-                             const TrtBackendOption& option) {
-  // paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()];
-  // std::string input_shapes[reader.NumInputs()];
-  std::vector<paddle2onnx::ModelTensorInfo> inputs(reader.NumInputs());
-  std::vector<std::string> input_shapes(reader.NumInputs());
-  for (int i = 0; i < reader.NumInputs(); ++i) {
-    reader.GetInputInfo(i, &inputs[i]);
-
-    // change 0 to -1, when input_dim is a string, onnx will make it to zero
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (inputs[i].shape[j] <= 0) {
-        inputs[i].shape[j] = -1;
-      }
-    }
-
-    input_shapes[i] = "";
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (j != inputs[i].rank - 1) {
-        input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", ");
-      } else {
-        input_shapes[i] += std::to_string(inputs[i].shape[j]);
-      }
-    }
-  }
-
-  bool all_check_passed = true;
-  for (int i = 0; i < reader.NumInputs(); ++i) {
-    bool contain_unknown_dim = false;
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (inputs[i].shape[j] < 0) {
-        contain_unknown_dim = true;
-      }
-    }
-
-    std::string name(inputs[i].name, strlen(inputs[i].name));
-    FDINFO << "The loaded model's input tensor:" << name
-           << " has shape [" + input_shapes[i] << "]." << std::endl;
-    if (contain_unknown_dim) {
-      auto iter1 = option.min_shape.find(name);
-      auto iter2 = option.max_shape.find(name);
-      auto iter3 = option.opt_shape.find(name);
-      if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() ||
-          iter3 == option.opt_shape.end()) {
-        FDERROR << "The loaded model's input tensor:" << name
-                << " has dynamic shape [" + input_shapes[i] +
-                       "], but didn't configure it's shape for tensorrt with "
-                       "SetTrtInputShape correctly."
-                << std::endl;
-        all_check_passed = false;
-      }
-    }
-  }
-
-  return all_check_passed;
-}
-
-bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
-                             const TrtBackendOption& option) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  cudaSetDevice(option.gpu_id);
-
-  std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
-  if (!fin) {
-    FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
-            << std::endl;
-    return false;
-  }
-  fin.seekg(0, std::ios::end);
-  std::string engine_buffer;
-  engine_buffer.resize(fin.tellg());
-  fin.seekg(0, std::ios::beg);
-  fin.read(&(engine_buffer.at(0)), engine_buffer.size());
-  fin.close();
-  SampleUniquePtr<IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  if (!runtime) {
-    FDERROR << "Failed to call createInferRuntime()." << std::endl;
-    return false;
-  }
-  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
-      runtime->deserializeCudaEngine(engine_buffer.data(),
-                                     engine_buffer.size()),
-      samplesCommon::InferDeleter());
-  if (!engine_) {
-    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
-    return false;
-  }
-
-  context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
-      engine_->createExecutionContext());
-  FDASSERT(cudaStreamCreate(&stream_) == 0,
-           "[ERROR] Error occurs while calling cudaStreamCreate().");
-  GetInputOutputInfo();
-  initialized_ = true;
-  return true;
-}
-
-bool TrtBackend::InitFromPaddle(const std::string& model_file,
-                                const std::string& params_file,
-                                const TrtBackendOption& option, bool verbose) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-
-#ifdef ENABLE_PADDLE_FRONTEND
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    std::strcpy(op.op_name, item.first.c_str());
-    std::strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
-  char* model_content_ptr;
-  int model_content_size = 0;
-  if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
-                           &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
-    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
-            << std::endl;
-    return false;
-  }
-
-  if (option.remove_multiclass_nms_) {
-    char* new_model = nullptr;
-    int new_model_size = 0;
-    if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
-                                          &new_model, &new_model_size)) {
-      FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
-      return false;
-    }
-    delete[] model_content_ptr;
-    std::string onnx_model_proto(new_model, new_model + new_model_size);
-    delete[] new_model;
-    return InitFromOnnx(onnx_model_proto, option, true);
-  }
-
-  std::string onnx_model_proto(model_content_ptr,
-                               model_content_ptr + model_content_size);
-  delete[] model_content_ptr;
-  model_content_ptr = nullptr;
-  return InitFromOnnx(onnx_model_proto, option, true);
-#else
-  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
-             "call `InitFromOnnx` instead."
-          << std::endl;
-  return false;
-#endif
-}
-
-bool TrtBackend::InitFromOnnx(const std::string& model_file,
-                              const TrtBackendOption& option,
-                              bool from_memory_buffer) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  cudaSetDevice(option.gpu_id);
-
-  std::string onnx_content = "";
-  if (!from_memory_buffer) {
-    std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
-    if (!fin) {
-      FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
-              << std::endl;
-      return false;
-    }
-    fin.seekg(0, std::ios::end);
-    onnx_content.resize(fin.tellg());
-    fin.seekg(0, std::ios::beg);
-    fin.read(&(onnx_content.at(0)), onnx_content.size());
-    fin.close();
-  } else {
-    onnx_content = model_file;
-  }
-
-  // This part of code will record the original outputs order
-  // because the converted tensorrt network may exist wrong order of outputs
-  outputs_order_.clear();
-  auto onnx_reader =
-      paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
-  for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
-    std::string name(
-        onnx_reader.output_names[i],
-        onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
-    outputs_order_[name] = i;
-  }
-  if (!CheckDynamicShapeConfig(onnx_reader, option)) {
-    FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl;
-    return false;
-  }
-
-  if (option.serialize_file != "") {
-    std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
-    if (fin) {
-      FDINFO << "Detect serialized TensorRT Engine file in "
-             << option.serialize_file << ", will load it directly."
-             << std::endl;
-      fin.close();
-      return InitFromTrt(option.serialize_file);
-    }
-  }
-
-  if (!CreateTrtEngine(onnx_content, option)) {
-    return false;
-  }
-
-  context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
-      engine_->createExecutionContext());
-  FDASSERT(cudaStreamCreate(&stream_) == 0,
-           "[ERROR] Error occurs while calling cudaStreamCreate().");
-  GetInputOutputInfo();
-  initialized_ = true;
-  return true;
-}
-
-bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
-                       std::vector<FDTensor>* outputs) {
-  AllocateBufferInDynamicShape(inputs, outputs);
-  std::vector<void*> input_binds(inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    if (inputs[i].dtype == FDDataType::INT64) {
-      int64_t* data = static_cast<int64_t*>(inputs[i].Data());
-      std::vector<int32_t> casted_data(data, data + inputs[i].Numel());
-      FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
-                               static_cast<void*>(casted_data.data()),
-                               inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice,
-                               stream_) == 0,
-               "[ERROR] Error occurs while copy memory from CPU to GPU.");
-    } else {
-      FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
-                               inputs[i].Data(), inputs[i].Nbytes(),
-                               cudaMemcpyHostToDevice, stream_) == 0,
-               "[ERROR] Error occurs while copy memory from CPU to GPU.");
-    }
-  }
-  if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
-    FDERROR << "Failed to Infer with TensorRT." << std::endl;
-    return false;
-  }
-  for (size_t i = 0; i < outputs->size(); ++i) {
-    FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
-                             outputs_buffer_[(*outputs)[i].name].data(),
-                             (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
-                             stream_) == 0,
-             "[ERROR] Error occurs while copy memory from GPU to CPU.");
-  }
-  return true;
-}
-
-void TrtBackend::GetInputOutputInfo() {
-  inputs_desc_.clear();
-  outputs_desc_.clear();
-  auto num_binds = engine_->getNbBindings();
-  for (auto i = 0; i < num_binds; ++i) {
-    std::string name = std::string(engine_->getBindingName(i));
-    auto shape = toVec(engine_->getBindingDimensions(i));
-    auto dtype = engine_->getBindingDataType(i);
-    if (engine_->bindingIsInput(i)) {
-      inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      inputs_buffer_[name] = DeviceBuffer(dtype);
-    } else {
-      outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      outputs_buffer_[name] = DeviceBuffer(dtype);
-    }
-  }
-  bindings_.resize(num_binds);
-}
-
-void TrtBackend::AllocateBufferInDynamicShape(
-    const std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs) {
-  for (const auto& item : inputs) {
-    auto idx = engine_->getBindingIndex(item.name.c_str());
-    std::vector<int> shape(item.shape.begin(), item.shape.end());
-    auto dims = sample::toDims(shape);
-    context_->setBindingDimensions(idx, dims);
-    if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
-      inputs_buffer_[item.name].resize(dims);
-      bindings_[idx] = inputs_buffer_[item.name].data();
-    }
-  }
-  if (outputs->size() != outputs_desc_.size()) {
-    outputs->resize(outputs_desc_.size());
-  }
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
-    auto output_dims = context_->getBindingDimensions(idx);
-
-    // find the original index of output
-    auto iter = outputs_order_.find(outputs_desc_[i].name);
-    FDASSERT(iter != outputs_order_.end(),
-             "Cannot find output:" + outputs_desc_[i].name +
-                 " of tensorrt network from the original model.");
-    auto ori_idx = iter->second;
-    (*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
-    (*outputs)[ori_idx].shape.assign(output_dims.d,
-                                     output_dims.d + output_dims.nbDims);
-    (*outputs)[ori_idx].name = outputs_desc_[i].name;
-    (*outputs)[ori_idx].data.resize(volume(output_dims) *
-                                    TrtDataTypeSize(outputs_desc_[i].dtype));
-    if ((*outputs)[ori_idx].Nbytes() >
-        outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
-      outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
-      bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
-    }
-  }
-}
-
-bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
-                                 const TrtBackendOption& option) {
-  const auto explicitBatch =
-      1U << static_cast<uint32_t>(
-          nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-
-  builder_ = SampleUniquePtr<nvinfer1::IBuilder>(
-      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
-  if (!builder_) {
-    FDERROR << "Failed to call createInferBuilder()." << std::endl;
-    return false;
-  }
-  network_ = SampleUniquePtr<nvinfer1::INetworkDefinition>(
-      builder_->createNetworkV2(explicitBatch));
-  if (!network_) {
-    FDERROR << "Failed to call createNetworkV2()." << std::endl;
-    return false;
-  }
-  auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(
-      builder_->createBuilderConfig());
-  if (!config) {
-    FDERROR << "Failed to call createBuilderConfig()." << std::endl;
-    return false;
-  }
-
-  if (option.enable_fp16) {
-    if (!builder_->platformHasFastFp16()) {
-      FDWARNING << "Detected FP16 is not supported in the current GPU, "
-                   "will use FP32 instead."
-                << std::endl;
-    } else {
-      config->setFlag(nvinfer1::BuilderFlag::kFP16);
-    }
-  }
-
-  parser_ = SampleUniquePtr<nvonnxparser::IParser>(
-      nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger()));
-  if (!parser_) {
-    FDERROR << "Failed to call createParser()." << std::endl;
-    return false;
-  }
-  if (!parser_->parse(onnx_model.data(), onnx_model.size())) {
-    FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
-    return false;
-  }
-
-  FDINFO << "Start to building TensorRT Engine..." << std::endl;
-  bool fp16 = builder_->platformHasFastFp16();
-  builder_->setMaxBatchSize(option.max_batch_size);
-
-  config->setMaxWorkspaceSize(option.max_workspace_size);
-
-  if (option.max_shape.size() > 0) {
-    auto profile = builder_->createOptimizationProfile();
-    FDASSERT(option.max_shape.size() == option.min_shape.size() &&
-                 option.min_shape.size() == option.opt_shape.size(),
-             "[TrtBackend] Size of max_shape/opt_shape/min_shape in "
-             "TrtBackendOption should keep same.");
-    for (const auto& item : option.min_shape) {
-      // set min shape
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMIN,
-                                      sample::toDims(item.second)),
-               "[TrtBackend] Failed to set min_shape for input: " + item.first +
-                   " in TrtBackend.");
-
-      // set optimization shape
-      auto iter = option.opt_shape.find(item.first);
-      FDASSERT(iter != option.opt_shape.end(),
-               "[TrtBackend] Cannot find input name: " + item.first +
-                   " in TrtBackendOption::opt_shape.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kOPT,
-                                      sample::toDims(iter->second)),
-               "[TrtBackend] Failed to set opt_shape for input: " + item.first +
-                   " in TrtBackend.");
-      // set max shape
-      iter = option.max_shape.find(item.first);
-      FDASSERT(iter != option.max_shape.end(),
-               "[TrtBackend] Cannot find input name: " + item.first +
-                   " in TrtBackendOption::max_shape.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMAX,
-                                      sample::toDims(iter->second)),
-               "[TrtBackend] Failed to set max_shape for input: " + item.first +
-                   " in TrtBackend.");
-    }
-    config->addOptimizationProfile(profile);
-  }
-
-  SampleUniquePtr<IHostMemory> plan{
-      builder_->buildSerializedNetwork(*network_, *config)};
-  if (!plan) {
-    FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
-    return false;
-  }
-
-  SampleUniquePtr<IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  if (!runtime) {
-    FDERROR << "Failed to call createInferRuntime()." << std::endl;
-    return false;
-  }
-
-  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
-      runtime->deserializeCudaEngine(plan->data(), plan->size()),
-      samplesCommon::InferDeleter());
-  if (!engine_) {
-    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
-    return false;
-  }
-
-  FDINFO << "TensorRT Engine is built succussfully." << std::endl;
-  if (option.serialize_file != "") {
-    FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file
-           << "." << std::endl;
-    std::ofstream engine_file(option.serialize_file.c_str());
-    if (!engine_file) {
-      FDERROR << "Failed to open " << option.serialize_file << " to write."
-              << std::endl;
-      return false;
-    }
-    engine_file.write(static_cast<char*>(plan->data()), plan->size());
-    engine_file.close();
-    FDINFO << "TensorRTEngine is serialized to local file "
-           << option.serialize_file
-           << ", we can load this model from the seralized engine "
-              "directly next time."
-           << std::endl;
-  }
-  return true;
-}
-
-TensorInfo TrtBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  TensorInfo info;
-  info.name = inputs_desc_[index].name;
-  info.shape.assign(inputs_desc_[index].shape.begin(),
-                    inputs_desc_[index].shape.end());
-  info.dtype = GetFDDataType(inputs_desc_[index].dtype);
-  return info;
-}
-
-TensorInfo TrtBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  TensorInfo info;
-  info.name = outputs_desc_[index].name;
-  info.shape.assign(outputs_desc_[index].shape.begin(),
-                    outputs_desc_[index].shape.end());
-  info.dtype = GetFDDataType(outputs_desc_[index].dtype);
-  return info;
-}
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.h
+++ b/csrcs/fastdeploy/backends/tensorrt/trt_backend.h
@@ -1,113 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-
-#include "fastdeploy/backends/tensorrt/common/argsParser.h"
-#include "fastdeploy/backends/tensorrt/common/buffers.h"
-#include "fastdeploy/backends/tensorrt/common/common.h"
-#include "fastdeploy/backends/tensorrt/common/logger.h"
-#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
-#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
-
-#include <cuda_runtime_api.h>
-#include "NvInfer.h"
-
-namespace fastdeploy {
-using namespace samplesCommon;
-
-struct TrtValueInfo {
-  std::string name;
-  std::vector<int> shape;
-  nvinfer1::DataType dtype;
-};
-
-struct TrtBackendOption {
-  int gpu_id = 0;
-  bool enable_fp16 = false;
-  bool enable_int8 = false;
-  size_t max_batch_size = 32;
-  size_t max_workspace_size = 1 << 30;
-  std::map<std::string, std::vector<int32_t>> max_shape;
-  std::map<std::string, std::vector<int32_t>> min_shape;
-  std::map<std::string, std::vector<int32_t>> opt_shape;
-  std::string serialize_file = "";
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-std::vector<int> toVec(const nvinfer1::Dims& dim);
-size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
-FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
-
-class TrtBackend : public BaseBackend {
- public:
-  TrtBackend() : engine_(nullptr), context_(nullptr) {}
-  virtual ~TrtBackend() = default;
-  void BuildOption(const TrtBackendOption& option);
-
-  bool InitFromPaddle(const std::string& model_file,
-                      const std::string& params_file,
-                      const TrtBackendOption& option = TrtBackendOption(),
-                      bool verbose = false);
-  bool InitFromOnnx(const std::string& model_file,
-                    const TrtBackendOption& option = TrtBackendOption(),
-                    bool from_memory_buffer = false);
-  bool InitFromTrt(const std::string& trt_engine_file,
-                   const TrtBackendOption& option = TrtBackendOption());
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-  int NumOutputs() const { return outputs_desc_.size(); }
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
- private:
-  std::shared_ptr<nvinfer1::ICudaEngine> engine_;
-  std::shared_ptr<nvinfer1::IExecutionContext> context_;
-  SampleUniquePtr<nvonnxparser::IParser> parser_;
-  SampleUniquePtr<nvinfer1::IBuilder> builder_;
-  SampleUniquePtr<nvinfer1::INetworkDefinition> network_;
-  cudaStream_t stream_{};
-  std::vector<void*> bindings_;
-  std::vector<TrtValueInfo> inputs_desc_;
-  std::vector<TrtValueInfo> outputs_desc_;
-  std::map<std::string, DeviceBuffer> inputs_buffer_;
-  std::map<std::string, DeviceBuffer> outputs_buffer_;
-
-  // Sometimes while the number of outputs > 1
-  // the output order of tensorrt may not be same
-  // with the original onnx model
-  // So this parameter will record to origin outputs
-  // order, to help recover the rigt order
-  std::map<std::string, int> outputs_order_;
-
-  void GetInputOutputInfo();
-  void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
-                                    std::vector<FDTensor>* outputs);
-  bool CreateTrtEngine(const std::string& onnx_model,
-                       const TrtBackendOption& option);
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/core/config.h.in
+++ b/csrcs/fastdeploy/core/config.h.in
@@ -1,54 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#ifndef FASTDEPLOY_DEBUG
-#cmakedefine FASTDEPLOY_DEBUG
-#endif
-
-#ifndef FASTDEPLOY_LIB
-#cmakedefine FASTDEPLOY_LIB
-#endif
-
-#ifndef ENABLE_PADDLE_FRONTEND
-#cmakedefine ENABLE_PADDLE_FRONTEND
-#endif
-
-#ifndef ENABLE_ORT_BACKEND
-#cmakedefine ENABLE_ORT_BACKEND
-#endif
-
-#ifndef ENABLE_PADDLE_BACKEND
-#cmakedefine ENABLE_PADDLE_BACKEND
-#endif
-
-#ifndef WITH_GPU
-#cmakedefine WITH_GPU
-#endif
-
-#ifndef ENABLE_TRT_BACKEND
-#cmakedefine ENABLE_TRT_BACKEND
-#endif
-
-#ifndef ENABLE_VISION
-#cmakedefine ENABLE_VISION
-#endif
-
-#ifndef ENABLE_OPENCV_CUDA
-#cmakedefine ENABLE_OPENCV_CUDA
-#endif
-
-#ifndef ENABLE_VISION_VISUALIZE
-#cmakedefine ENABLE_VISION_VISUALIZE
-#endif
--- a/csrcs/fastdeploy/core/fd_tensor.cc
+++ b/csrcs/fastdeploy/core/fd_tensor.cc
@@ -1,134 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-#ifdef WITH_GPU
-#include <cuda_runtime_api.h>
-#endif
-
-namespace fastdeploy {
-
-void* FDTensor::MutableData() {
-  if (external_data_ptr != nullptr) {
-    return external_data_ptr;
-  }
-  return data.data();
-}
-
-void* FDTensor::Data() {
-  if (external_data_ptr != nullptr) {
-    if (device == Device::GPU) {
-#ifdef WITH_GPU
-      // need to copy cuda mem to cpu first
-      temporary_cpu_buffer.resize(Nbytes());
-      FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
-                          Nbytes(), cudaMemcpyDeviceToHost) == 0,
-               "[ERROR] Error occurs while copy memory from GPU to CPU");
-      return temporary_cpu_buffer.data();
-#else
-      FDASSERT(false,
-               "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
-               "an unexpected problem happend.");
-#endif
-    } else {
-      return external_data_ptr;
-    }
-  }
-  return data.data();
-}
-
-const void* FDTensor::Data() const {
-  if (external_data_ptr != nullptr) {
-    return external_data_ptr;
-  }
-  return data.data();
-}
-
-void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
-                               const FDDataType& data_type, void* data_buffer) {
-  dtype = data_type;
-  shape.assign(new_shape.begin(), new_shape.end());
-  external_data_ptr = data_buffer;
-}
-
-void FDTensor::Allocate(const std::vector<int64_t>& new_shape,
-                        const FDDataType& data_type,
-                        const std::string& tensor_name) {
-  dtype = data_type;
-  name = tensor_name;
-  shape.assign(new_shape.begin(), new_shape.end());
-  int unit = FDDataTypeSize(data_type);
-  int total_size =
-      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-  data.resize(total_size * unit);
-}
-
-int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
-
-int FDTensor::Numel() const {
-  return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-}
-
-template <typename T>
-void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
-                         double* min) {
-  T* ptr = static_cast<T*>(src_ptr);
-  *mean = 0;
-  *max = -99999999;
-  *min = 99999999;
-  for (int i = 0; i < size; ++i) {
-    if (*(ptr + i) > *max) {
-      *max = *(ptr + i);
-    }
-    if (*(ptr + i) < *min) {
-      *min = *(ptr + i);
-    }
-    *mean += *(ptr + i);
-  }
-  *mean = *mean / size;
-}
-
-void FDTensor::PrintInfo(const std::string& prefix) {
-  double mean = 0;
-  double max = -99999999;
-  double min = 99999999;
-  if (dtype == FDDataType::FP32) {
-    CalculateStatisInfo<float>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::FP64) {
-    CalculateStatisInfo<double>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT8) {
-    CalculateStatisInfo<int8_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::UINT8) {
-    CalculateStatisInfo<uint8_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT32) {
-    CalculateStatisInfo<int32_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT64) {
-    CalculateStatisInfo<int64_t>(Data(), Numel(), &mean, &max, &min);
-  } else {
-    FDASSERT(false,
-             "PrintInfo function doesn't support current situation, maybe you "
-             "need enhance this function now.")
-  }
-  std::cout << prefix << ": shape=";
-  for (int i = 0; i < shape.size(); ++i) {
-    std::cout << shape[i] << " ";
-  }
-  std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max
-            << ", min=" << min << std::endl;
-}
-
-FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/core/fd_tensor.h
+++ b/csrcs/fastdeploy/core/fd_tensor.h
@@ -1,87 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/core/fd_type.h"
-
-namespace fastdeploy {
-
-struct FASTDEPLOY_DECL FDTensor {
-  std::vector<int8_t> data;
-  std::vector<int64_t> shape;
-  std::string name = "";
-  FDDataType dtype;
-
-  // This use to skip memory copy step
-  // the external_data_ptr will point to the user allocated memory
-  // user has to maintain the memory, allocate and release
-  void* external_data_ptr = nullptr;
-  // The internal data will be on CPU
-  // Some times, the external data is on the GPU, and we are going to use
-  // GPU to inference the model
-  // so we can skip data transfer, which may improve the efficience
-  Device device = Device::CPU;
-
-  // if the external data is not on CPU, we use this temporary buffer
-  // to transfer data to CPU at some cases we need to visit the
-  // other devices' data
-  std::vector<int8_t> temporary_cpu_buffer;
-
-  // Get data buffer pointer
-  void* MutableData();
-
-  // Use this data to get the tensor data to process
-  // Since the most senario is process data in CPU
-  // this function weill return a pointer to cpu memory
-  // buffer.
-  // If the original data is on other device, the data
-  // will copy to cpu store in `temporary_cpu_buffer`
-  void* Data();
-
-  const void* Data() const;
-
-  // Set user memory buffer for Tensor, the memory is managed by
-  // the user it self, but the Tensor will share the memory with user
-  // So take care with the user buffer
-  void SetExternalData(const std::vector<int64_t>& new_shape,
-                       const FDDataType& data_type, void* data_buffer);
-
-  // Initialize Tensor
-  // Include setting attribute for tensor
-  // and allocate cpu memory buffer
-  void Allocate(const std::vector<int64_t>& new_shape,
-                const FDDataType& data_type,
-                const std::string& tensor_name = "");
-
-  // Total size of tensor memory buffer in bytes
-  int Nbytes() const;
-
-  // Total number of elements in this tensor
-  int Numel() const;
-
-  // Debug function
-  // Use this function to print shape, dtype, mean, max, min
-  // prefix will also be printed as tag
-  void PrintInfo(const std::string& prefix = "TensorInfo: ");
-
-  FDTensor() {}
-  explicit FDTensor(const std::string& tensor_name);
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/core/fd_type.cc
+++ b/csrcs/fastdeploy/core/fd_type.cc
@@ -1,123 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/core/fd_type.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-int FDDataTypeSize(const FDDataType& data_type) {
-  FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
-  if (data_type == FDDataType::BOOL) {
-    return sizeof(bool);
-  } else if (data_type == FDDataType::INT16) {
-    return sizeof(int16_t);
-  } else if (data_type == FDDataType::INT32) {
-    return sizeof(int32_t);
-  } else if (data_type == FDDataType::INT64) {
-    return sizeof(int64_t);
-  } else if (data_type == FDDataType::FP32) {
-    return sizeof(float);
-  } else if (data_type == FDDataType::FP64) {
-    return sizeof(double);
-  } else if (data_type == FDDataType::UINT8) {
-    return sizeof(uint8_t);
-  } else {
-    FDASSERT(false, "Unexpected data type: " + Str(data_type));
-  }
-  return -1;
-}
-
-std::string Str(const Device& d) {
-  std::string out;
-  switch (d) {
-    case Device::DEFAULT:
-      out = "Device::DEFAULT";
-      break;
-    case Device::CPU:
-      out = "Device::CPU";
-      break;
-    case Device::GPU:
-      out = "Device::GPU";
-      break;
-    default:
-      out = "Device::UNKOWN";
-  }
-  return out;
-}
-
-std::string Str(const FDDataType& fdt) {
-  std::string out;
-  switch (fdt) {
-    case FDDataType::BOOL:
-      out = "FDDataType::BOOL";
-      break;
-    case FDDataType::INT16:
-      out = "FDDataType::INT16";
-      break;
-    case FDDataType::INT32:
-      out = "FDDataType::INT32";
-      break;
-    case FDDataType::INT64:
-      out = "FDDataType::INT64";
-      break;
-    case FDDataType::FP32:
-      out = "FDDataType::FP32";
-      break;
-    case FDDataType::FP64:
-      out = "FDDataType::FP64";
-      break;
-    case FDDataType::FP16:
-      out = "FDDataType::FP16";
-      break;
-    case FDDataType::UINT8:
-      out = "FDDataType::UINT8";
-      break;
-    case FDDataType::INT8:
-      out = "FDDataType::INT8";
-      break;
-    default:
-      out = "FDDataType::UNKNOWN";
-  }
-  return out;
-}
-
-template <typename PlainType>
-const FDDataType TypeToDataType<PlainType>::dtype = UNKNOWN1;
-
-template <>
-const FDDataType TypeToDataType<bool>::dtype = BOOL;
-
-template <>
-const FDDataType TypeToDataType<int16_t>::dtype = INT16;
-
-template <>
-const FDDataType TypeToDataType<int32_t>::dtype = INT32;
-
-template <>
-const FDDataType TypeToDataType<int64_t>::dtype = INT64;
-
-template <>
-const FDDataType TypeToDataType<float>::dtype = FP32;
-
-template <>
-const FDDataType TypeToDataType<double>::dtype = FP64;
-
-template <>
-const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
-
-template <>
-const FDDataType TypeToDataType<int8_t>::dtype = INT8;
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/core/fd_type.h
+++ b/csrcs/fastdeploy/core/fd_type.h
@@ -1,63 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <ostream>
-#include <sstream>
-#include <string>
-
-#include "fastdeploy/core/config.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-enum FASTDEPLOY_DECL Device { DEFAULT, CPU, GPU };
-
-FASTDEPLOY_DECL std::string Str(const Device& d);
-
-enum FASTDEPLOY_DECL FDDataType {
-  BOOL,
-  INT16,
-  INT32,
-  INT64,
-  FP16,
-  FP32,
-  FP64,
-  UNKNOWN1,
-  UNKNOWN2,
-  UNKNOWN3,
-  UNKNOWN4,
-  UNKNOWN5,
-  UNKNOWN6,
-  UNKNOWN7,
-  UNKNOWN8,
-  UNKNOWN9,
-  UNKNOWN10,
-  UNKNOWN11,
-  UNKNOWN12,
-  UNKNOWN13,
-  UINT8,
-  INT8
-};
-
-FASTDEPLOY_DECL std::string Str(const FDDataType& fdt);
-
-FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype);
-
-template <typename PlainType>
-struct FASTDEPLOY_DECL TypeToDataType {
-  static const FDDataType dtype;
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/fastdeploy_model.cc
+++ b/csrcs/fastdeploy/fastdeploy_model.cc
@@ -1,145 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-bool FastDeployModel::InitRuntime() {
-  FDASSERT(
-      CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
-      "ModelFormatCheck Failed.");
-  if (runtime_initialized_) {
-    FDERROR << "The model is already initialized, cannot be initliazed again."
-            << std::endl;
-    return false;
-  }
-  if (runtime_option.backend != Backend::UNKNOWN) {
-    if (runtime_option.backend == Backend::ORT) {
-      if (!IsBackendAvailable(Backend::ORT)) {
-        FDERROR
-            << "Backend::ORT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::TRT) {
-      if (!IsBackendAvailable(Backend::TRT)) {
-        FDERROR
-            << "Backend::TRT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::PDINFER) {
-      if (!IsBackendAvailable(Backend::PDINFER)) {
-        FDERROR << "Backend::PDINFER is not compiled with current FastDeploy "
-                   "library."
-                << std::endl;
-        return false;
-      }
-    } else {
-      FDERROR
-          << "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now."
-          << std::endl;
-      return false;
-    }
-    runtime_ = utils::make_unique<Runtime>();
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-
-  if (runtime_option.device == Device::CPU) {
-    return CreateCpuBackend();
-  } else if (runtime_option.device == Device::GPU) {
-#ifdef WITH_GPU
-    return CreateGpuBackend();
-#else
-    FDERROR << "The compiled FastDeploy library doesn't support GPU now."
-            << std::endl;
-    return false;
-#endif
-  }
-  FDERROR << "Only support CPU/GPU now." << std::endl;
-  return false;
-}
-
-bool FastDeployModel::CreateCpuBackend() {
-  if (valid_cpu_backends.size() == 0) {
-    FDERROR << "There's no valid cpu backends for model: " << ModelName()
-            << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
-    if (!IsBackendAvailable(valid_cpu_backends[i])) {
-      continue;
-    }
-    runtime_option.backend = valid_cpu_backends[i];
-    runtime_ = std::unique_ptr<Runtime>(new Runtime());
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
-  return false;
-}
-
-bool FastDeployModel::CreateGpuBackend() {
-  if (valid_gpu_backends.size() == 0) {
-    FDERROR << "There's no valid gpu backends for model: " << ModelName()
-            << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
-    if (!IsBackendAvailable(valid_gpu_backends[i])) {
-      continue;
-    }
-    runtime_option.backend = valid_gpu_backends[i];
-    runtime_ = std::unique_ptr<Runtime>(new Runtime());
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-  FDERROR << "Cannot find an available gpu backend to load this model."
-          << std::endl;
-  return false;
-}
-
-bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
-                            std::vector<FDTensor>* output_tensors) {
-  return runtime_->Infer(input_tensors, output_tensors);
-}
-
-void FastDeployModel::EnableDebug() {
-#ifdef FASTDEPLOY_DEBUG
-  debug_ = true;
-#else
-  FDWARNING << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
-               "cannot enable debug mode."
-            << std::endl;
-  debug_ = false;
-#endif
-}
-
-bool FastDeployModel::DebugEnabled() { return debug_; }
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/fastdeploy_model.h
+++ b/csrcs/fastdeploy/fastdeploy_model.h
@@ -1,67 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/fastdeploy_runtime.h"
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL FastDeployModel {
- public:
-  virtual std::string ModelName() const { return "NameUndefined"; }
-
-  virtual bool InitRuntime();
-  virtual bool CreateCpuBackend();
-  virtual bool CreateGpuBackend();
-  virtual bool Infer(std::vector<FDTensor>& input_tensors,
-                     std::vector<FDTensor>* output_tensors);
-
-  RuntimeOption runtime_option;
-  std::vector<Backend> valid_cpu_backends = {Backend::ORT};
-  std::vector<Backend> valid_gpu_backends = {Backend::ORT};
-  std::vector<Backend> valid_external_backends;
-  bool initialized = false;
-  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
-  virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
-  virtual TensorInfo InputInfoOfRuntime(int index) {
-    return runtime_->GetInputInfo(index);
-  }
-  virtual TensorInfo OutputInfoOfRuntime(int index) {
-    return runtime_->GetOutputInfo(index);
-  }
-  virtual bool Initialized() const {
-    return runtime_initialized_ && initialized;
-  }
-
-  virtual void EnableDebug();
-  virtual bool DebugEnabled();
-
- private:
-  std::unique_ptr<Runtime> runtime_;
-  bool runtime_initialized_ = false;
-  bool debug_ = false;
-};
-
-#define TIMERECORD_START(id) \
-  TimeCounter tc_##id;       \
-  tc_##id.Start();
-
-#define TIMERECORD_END(id, prefix)                                           \
-  if (DebugEnabled()) {                                                      \
-    tc_##id.End();                                                           \
-    FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \
-               << prefix << " duration = " << tc_##id.Duration() << "s."     \
-               << std::endl;                                                 \
-  }
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/fastdeploy_runtime.cc
+++ b/csrcs/fastdeploy/fastdeploy_runtime.cc
@@ -1,365 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/fastdeploy_runtime.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/utils/utils.h"
-
-#ifdef ENABLE_ORT_BACKEND
-#include "fastdeploy/backends/ort/ort_backend.h"
-#endif
-
-#ifdef ENABLE_TRT_BACKEND
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#endif
-
-#ifdef ENABLE_PADDLE_BACKEND
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-#endif
-
-namespace fastdeploy {
-
-std::vector<Backend> GetAvailableBackends() {
-  std::vector<Backend> backends;
-#ifdef ENABLE_ORT_BACKEND
-  backends.push_back(Backend::ORT);
-#endif
-#ifdef ENABLE_TRT_BACKEND
-  backends.push_back(Backend::TRT);
-#endif
-#ifdef ENABLE_PADDLE_BACKEND
-  backends.push_back(Backend::PDINFER);
-#endif
-  return backends;
-}
-
-bool IsBackendAvailable(const Backend& backend) {
-  std::vector<Backend> backends = GetAvailableBackends();
-  for (size_t i = 0; i < backends.size(); ++i) {
-    if (backend == backends[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-
-std::string Str(const Backend& b) {
-  if (b == Backend::ORT) {
-    return "Backend::ORT";
-  } else if (b == Backend::TRT) {
-    return "Backend::TRT";
-  } else if (b == Backend::PDINFER) {
-    return "Backend::PDINFER";
-  }
-  return "UNKNOWN-Backend";
-}
-
-std::string Str(const Frontend& f) {
-  if (f == Frontend::PADDLE) {
-    return "Frontend::PADDLE";
-  } else if (f == Frontend::ONNX) {
-    return "Frontend::ONNX";
-  }
-  return "UNKNOWN-Frontend";
-}
-
-bool CheckModelFormat(const std::string& model_file,
-                      const Frontend& model_format) {
-  if (model_format == Frontend::PADDLE) {
-    if (model_file.size() < 8 ||
-        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
-      FDERROR << "With model format of Frontend::PADDLE, the model file "
-                 "should ends with `.pdmodel`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == Frontend::ONNX) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
-      FDERROR << "With model format of Frontend::ONNX, the model file "
-                 "should ends with `.onnx`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else {
-    FDERROR << "Only support model format with frontend Frontend::PADDLE / "
-               "Frontend::ONNX."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-Frontend GuessModelFormat(const std::string& model_file) {
-  if (model_file.size() > 8 &&
-      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
-    FDLogger() << "Model Format: PaddlePaddle." << std::endl;
-    return Frontend::PADDLE;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
-    FDLogger() << "Model Format: ONNX." << std::endl;
-    return Frontend::ONNX;
-  }
-
-  FDERROR << "Cannot guess which model format you are using, please set "
-             "RuntimeOption::model_format manually."
-          << std::endl;
-  return Frontend::PADDLE;
-}
-
-void RuntimeOption::SetModelPath(const std::string& model_path,
-                                 const std::string& params_path,
-                                 const std::string& _model_format) {
-  if (_model_format == "paddle") {
-    model_file = model_path;
-    params_file = params_path;
-    model_format = Frontend::PADDLE;
-  } else if (_model_format == "onnx") {
-    model_file = model_path;
-    model_format = Frontend::ONNX;
-  } else {
-    FDASSERT(false, "The model format only can be 'paddle' or 'onnx'.");
-  }
-}
-
-void RuntimeOption::UseGpu(int gpu_id) {
-#ifdef WITH_GPU
-  device = Device::GPU;
-  device_id = gpu_id;
-#else
-  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
-            << std::endl;
-  device = Device::CPU;
-#endif
-}
-
-void RuntimeOption::UseCpu() { device = Device::CPU; }
-
-void RuntimeOption::SetCpuThreadNum(int thread_num) {
-  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
-  cpu_thread_num = thread_num;
-}
-
-// use paddle inference backend
-void RuntimeOption::UsePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  backend = Backend::PDINFER;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
-#endif
-}
-
-// use onnxruntime backend
-void RuntimeOption::UseOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  backend = Backend::ORT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
-#endif
-}
-
-void RuntimeOption::UseTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  backend = Backend::TRT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
-#endif
-}
-
-void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; }
-
-void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; }
-
-void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
-  FDASSERT(size > 0, "Parameter size must greater than 0.");
-  pd_mkldnn_cache_size = size;
-}
-
-void RuntimeOption::SetTrtInputShape(const std::string& input_name,
-                                     const std::vector<int32_t>& min_shape,
-                                     const std::vector<int32_t>& opt_shape,
-                                     const std::vector<int32_t>& max_shape) {
-  trt_min_shape[input_name].clear();
-  trt_max_shape[input_name].clear();
-  trt_opt_shape[input_name].clear();
-  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  if (opt_shape.size() == 0) {
-    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
-  }
-  if (max_shape.size() == 0) {
-    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
-  }
-}
-
-void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
-
-void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
-
-void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
-  trt_serialize_file = cache_file_path;
-}
-
-bool Runtime::Init(const RuntimeOption& _option) {
-  option = _option;
-  if (option.model_format == Frontend::AUTOREC) {
-    option.model_format = GuessModelFormat(_option.model_file);
-  }
-  if (option.backend == Backend::UNKNOWN) {
-    if (IsBackendAvailable(Backend::ORT)) {
-      option.backend = Backend::ORT;
-    } else if (IsBackendAvailable(Backend::PDINFER)) {
-      option.backend = Backend::PDINFER;
-    } else {
-      FDERROR << "Please define backend in RuntimeOption, current it's "
-                 "Backend::UNKNOWN."
-              << std::endl;
-      return false;
-    }
-  }
-  if (option.backend == Backend::ORT) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::TRT only supports Device::CPU/Device::GPU.");
-    CreateOrtBackend();
-  } else if (option.backend == Backend::TRT) {
-    FDASSERT(option.device == Device::GPU,
-             "Backend::TRT only supports Device::GPU.");
-    CreateTrtBackend();
-  } else if (option.backend == Backend::PDINFER) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::TRT only supports Device::CPU/Device::GPU.");
-    FDASSERT(
-        option.model_format == Frontend::PADDLE,
-        "Backend::PDINFER only supports model format of Frontend::PADDLE.");
-    CreatePaddleBackend();
-  } else {
-    FDERROR << "Runtime only support "
-               "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-TensorInfo Runtime::GetInputInfo(int index) {
-  return backend_->GetInputInfo(index);
-}
-
-TensorInfo Runtime::GetOutputInfo(int index) {
-  return backend_->GetOutputInfo(index);
-}
-
-bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
-                    std::vector<FDTensor>* output_tensors) {
-  return backend_->Infer(input_tensors, output_tensors);
-}
-
-void Runtime::CreatePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  auto pd_option = PaddleBackendOption();
-  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
-  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
-  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  pd_option.gpu_id = option.device_id;
-  pd_option.cpu_thread_num = option.cpu_thread_num;
-  FDASSERT(option.model_format == Frontend::PADDLE,
-           "PaddleBackend only support model format of Frontend::PADDLE.");
-  backend_ = utils::make_unique<PaddleBackend>();
-  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
-  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
-                                          pd_option),
-           "Load model from Paddle failed while initliazing PaddleBackend.");
-#else
-  FDASSERT(false,
-           "PaddleBackend is not available, please compiled with "
-           "ENABLE_PADDLE_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  auto ort_option = OrtBackendOption();
-  ort_option.graph_optimization_level = option.ort_graph_opt_level;
-  ort_option.intra_op_num_threads = option.cpu_thread_num;
-  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
-  ort_option.execution_mode = option.ort_execution_mode;
-  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  ort_option.gpu_id = option.device_id;
-
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  ort_option.custom_op_info_ = option.custom_op_info_;
-
-  FDASSERT(option.model_format == Frontend::PADDLE ||
-               option.model_format == Frontend::ONNX,
-           "OrtBackend only support model format of Frontend::PADDLE / "
-           "Frontend::ONNX.");
-  backend_ = utils::make_unique<OrtBackend>();
-  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
-  if (option.model_format == Frontend::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
-             "Load model from ONNX failed while initliazing OrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, ort_option),
-             "Load model from Paddle failed while initliazing OrtBackend.");
-  }
-#else
-  FDASSERT(false,
-           "OrtBackend is not available, please compiled with "
-           "ENABLE_ORT_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  auto trt_option = TrtBackendOption();
-  trt_option.gpu_id = option.device_id;
-  trt_option.enable_fp16 = option.trt_enable_fp16;
-  trt_option.enable_int8 = option.trt_enable_int8;
-  trt_option.max_batch_size = option.trt_max_batch_size;
-  trt_option.max_workspace_size = option.trt_max_workspace_size;
-  trt_option.max_shape = option.trt_max_shape;
-  trt_option.min_shape = option.trt_min_shape;
-  trt_option.opt_shape = option.trt_opt_shape;
-  trt_option.serialize_file = option.trt_serialize_file;
-
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  trt_option.custom_op_info_ = option.custom_op_info_;
-
-  FDASSERT(option.model_format == Frontend::PADDLE ||
-               option.model_format == Frontend::ONNX,
-           "TrtBackend only support model format of Frontend::PADDLE / "
-           "Frontend::ONNX.");
-  backend_ = utils::make_unique<TrtBackend>();
-  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
-  if (option.model_format == Frontend::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
-             "Load model from ONNX failed while initliazing TrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, trt_option),
-             "Load model from Paddle failed while initliazing TrtBackend.");
-  }
-#else
-  FDASSERT(false,
-           "TrtBackend is not available, please compiled with "
-           "ENABLE_TRT_BACKEND=ON.");
-#endif
-}
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/fastdeploy_runtime.h
+++ b/csrcs/fastdeploy/fastdeploy_runtime.h
@@ -1,159 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <map>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/utils/perf.h"
-
-namespace fastdeploy {
-
-enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
-// AUTOREC will according to the name of model file
-// to decide which Frontend is
-enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
-
-FASTDEPLOY_DECL std::string Str(const Backend& b);
-FASTDEPLOY_DECL std::string Str(const Frontend& f);
-FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
-
-FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
-
-bool CheckModelFormat(const std::string& model_file,
-                      const Frontend& model_format);
-Frontend GuessModelFormat(const std::string& model_file);
-
-struct FASTDEPLOY_DECL RuntimeOption {
-  // set path of model file and params file
-  // for onnx, only need to define model_file, but also need to
-  // define model_format
-  // model_format support 'paddle' / 'onnx' now.
-  void SetModelPath(const std::string& model_path,
-                    const std::string& params_path = "",
-                    const std::string& _model_format = "paddle");
-
-  // set model inference in GPU
-  void UseCpu();
-
-  // set model inference in CPU
-  void UseGpu(int gpu_id = 0);
-
-  // set number of thread while inference in CPU
-  void SetCpuThreadNum(int thread_num);
-
-  // use paddle inference backend
-  void UsePaddleBackend();
-
-  // use onnxruntime backend
-  void UseOrtBackend();
-
-  // use tensorrt backend
-  void UseTrtBackend();
-
-  // enable mkldnn while use paddle inference in CPU
-  void EnablePaddleMKLDNN();
-  // disable mkldnn while use paddle inference in CPU
-  void DisablePaddleMKLDNN();
-
-  // set size of cached shape while enable mkldnn with paddle inference backend
-  void SetPaddleMKLDNNCacheSize(int size);
-
-  // set tensorrt shape while the inputs of model contain dynamic shape
-  // min_shape: the minimum shape
-  // opt_shape: the most common shape while inference, default be empty
-  // max_shape: the maximum shape, default be empty
-
-  // if opt_shape, max_shape are empty, they will keep same with the min_shape
-  // which means the shape will be fixed as min_shape while inference
-  void SetTrtInputShape(
-      const std::string& input_name, const std::vector<int32_t>& min_shape,
-      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
-      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
-
-  // enable half precision while use tensorrt backend
-  void EnableTrtFP16();
-  // disable half precision, change to full precision(float32)
-  void DisableTrtFP16();
-
-  void SetTrtCacheFile(const std::string& cache_file_path);
-
-  Backend backend = Backend::UNKNOWN;
-  // for cpu inference and preprocess
-  int cpu_thread_num = 8;
-  int device_id = 0;
-
-  Device device = Device::CPU;
-
-  // ======Only for ORT Backend========
-  // -1 means use default value by ort
-  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
-  // ORT_ENABLE_ALL
-  int ort_graph_opt_level = -1;
-  int ort_inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
-  int ort_execution_mode = -1;
-
-  // ======Only for Paddle Backend=====
-  bool pd_enable_mkldnn = true;
-  int pd_mkldnn_cache_size = 1;
-
-  // ======Only for Trt Backend=======
-  std::map<std::string, std::vector<int32_t>> trt_max_shape;
-  std::map<std::string, std::vector<int32_t>> trt_min_shape;
-  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
-  std::string trt_serialize_file = "";
-  bool trt_enable_fp16 = false;
-  bool trt_enable_int8 = false;
-  size_t trt_max_batch_size = 32;
-  size_t trt_max_workspace_size = 1 << 30;
-
-  std::string model_file = "";   // Path of model file
-  std::string params_file = "";  // Path of parameters file, can be empty
-  Frontend model_format = Frontend::AUTOREC;  // format of input model
-
-  // inside parameters, only for inside usage
-  // remove multiclass_nms in Paddle2ONNX
-  bool remove_multiclass_nms_ = false;
-  // for Paddle2ONNX to export custom operators
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-struct FASTDEPLOY_DECL Runtime {
- public:
-  //  explicit Runtime(const RuntimeOption& _option = RuntimeOption());
-
-  bool Init(const RuntimeOption& _option);
-
-  bool Infer(std::vector<FDTensor>& input_tensors,
-             std::vector<FDTensor>* output_tensors);
-
-  void CreateOrtBackend();
-
-  void CreatePaddleBackend();
-
-  void CreateTrtBackend();
-
-  int NumInputs() { return backend_->NumInputs(); }
-  int NumOutputs() { return backend_->NumOutputs(); }
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
-  RuntimeOption option;
-
- private:
-  std::unique_ptr<BaseBackend> backend_;
-};
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/function/eigen.cc
+++ b/csrcs/fastdeploy/function/eigen.cc
@@ -1,32 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/function/eigen.h"
-
-namespace fastdeploy {
-
-std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::instance_ = nullptr;
-
-std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::GetInstance() {
-  if (instance_ == nullptr) {
-    instance_ = std::make_shared<EigenDeviceWrapper>();
-  }
-  return instance_;
-}
-
-const Eigen::DefaultDevice* EigenDeviceWrapper::GetDevice() const {
-  return &device_;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/function/eigen.h
+++ b/csrcs/fastdeploy/function/eigen.h
@@ -1,109 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <memory>
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "unsupported/Eigen/CXX11/Tensor"
-
-namespace fastdeploy {
-// EigenDim converts shape into Eigen::DSizes.
-template <int D>
-struct EigenDim {
-  using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
-
-  static Type From(const std::vector<int64_t>& dims) {
-    Type ret;
-    for (int64_t d = 0; d < dims.size(); d++) {
-      ret[d] = dims[d];
-    }
-    return ret;
-  }
-};
-
-// Interpret FDTensor as EigenTensor and EigenConstTensor.
-template <typename T, size_t D, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenTensor {
-  using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
-
-  using ConstType =
-      Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
-
-  static Type From(FDTensor& tensor,
-                   const std::vector<int64_t>& dims) {  // NOLINT
-    return Type(reinterpret_cast<T*>(tensor.Data()), EigenDim<D>::From(dims));
-  }
-
-  static Type From(FDTensor& tensor) {  // NOLINT
-    return From(tensor, tensor.shape);
-  }  // NOLINT
-
-  static ConstType From(const FDTensor& tensor,
-                        const std::vector<int64_t>& dims) {
-    return ConstType(reinterpret_cast<const T*>(tensor.Data()),
-                     EigenDim<D>::From(dims));
-  }
-
-  static ConstType From(const FDTensor& tensor) {
-    return From(tensor, tensor.shape);
-  }
-};
-
-template <typename T, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenScalar {
-  // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
-  using Type = Eigen::TensorMap<
-      Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
-  using ConstType = Eigen::TensorMap<
-      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
-
-  static Type From(FDTensor& tensor) {
-    return Type(reinterpret_cast<T*>(tensor.Data()));
-  }  // NOLINT
-
-  static ConstType From(const FDTensor& tensor) {
-    return ConstType(reinterpret_cast<const T*>(tensor.Data()));
-  }
-};
-
-template <typename T, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
-  // Flatten reshapes a Tensor into an EigenVector.
-  static typename EigenVector::Type Flatten(FDTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {tensor.Numel()});
-  }
-
-  static typename EigenVector::ConstType Flatten(
-      const FDTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {tensor.Numel()});
-  }
-};
-
-class EigenDeviceWrapper {
- public:
-  static std::shared_ptr<EigenDeviceWrapper> GetInstance();
-  const Eigen::DefaultDevice* GetDevice() const;
-
- private:
-  Eigen::DefaultDevice device_;
-  static std::shared_ptr<EigenDeviceWrapper> instance_;
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/function/reduce.cc
+++ b/csrcs/fastdeploy/function/reduce.cc
@@ -1,246 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <set>
-
-#include "fastdeploy/function/eigen.h"
-#include "fastdeploy/function/reduce.h"
-#include "fastdeploy/function/reduce_functor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-#ifdef ENABLE_FDTENSOR_FUNC
-
-template <typename T, size_t D, size_t R_D, typename Functor>
-void ReduceFunctor(const FDTensor& input, FDTensor* output,
-                   const std::vector<int64_t>& dims, bool keep_dim) {
-  auto x = EigenTensor<T, D>::From(input);
-  auto x_rank = static_cast<int>(x.dimensions().size());
-  auto reduce_dim = Eigen::array<int, R_D>();
-  std::vector<int64_t> dims_ref = dims;
-
-  auto out_dims = input.shape;
-  for (size_t i = 0; i < dims_ref.size(); ++i) {
-    if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i];
-    reduce_dim[i] = dims_ref[i];
-    out_dims[dims_ref[i]] = 1;
-  }
-  auto origin_output_dims = out_dims;
-  output->Allocate(origin_output_dims, TypeToDataType<T>::dtype);
-  // construct the squeezed output tensor
-  if (x_rank > 1) {
-    const int kDelFlag = -2;
-    for (size_t i = 0; i < dims_ref.size(); ++i) {
-      out_dims[dims_ref[i]] = kDelFlag;
-    }
-    out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag),
-                   out_dims.end());
-  }
-
-  auto& place = *EigenDeviceWrapper::GetInstance()->GetDevice();
-  Functor functor;
-  if (D == 1) {
-    auto out = EigenScalar<T>::From(*output);
-    functor(place, &x, &out, reduce_dim);
-  } else {
-    auto out = EigenTensor<T, (D - R_D)>::From(*output, out_dims);
-    functor(place, &x, &out, reduce_dim);
-    if (!keep_dim) {
-      output->shape = std::move(out_dims);
-    }
-  }
-}
-
-#define HANDLE_REDUCE_DIM(NDIM, RDIM)                                        \
-  if (ndim == NDIM && rdim == RDIM) {                                        \
-    ReduceFunctor<OutT, NDIM, RDIM, Functor>(input, output, dims, keep_dim); \
-  }
-
-inline void GetShuffledDim(const std::vector<int64_t>& src_dims,
-                           std::vector<int64_t>* dst_dims,
-                           const std::vector<int64_t>& reduced_dims,
-                           std::vector<int>* perm_axis) {
-  // check if it's a reduced dim
-  std::vector<bool> src_dims_check(src_dims.size(), false);
-  size_t src_size = src_dims.size();
-  size_t reduce_size = reduced_dims.size();
-  std::vector<int64_t> regular_reduced_dims = reduced_dims;
-  for (size_t i = 0; i < regular_reduced_dims.size(); i++) {
-    if (regular_reduced_dims[i] < 0) {
-      regular_reduced_dims[i] = src_size + regular_reduced_dims[i];
-    }
-  }
-
-  for (size_t i = 0; i < reduce_size; ++i) {
-    dst_dims->at(src_size - reduce_size + i) =
-        src_dims[regular_reduced_dims[i]];
-    (*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i];
-    src_dims_check[regular_reduced_dims[i]] = true;
-  }
-
-  size_t offset = 0;
-  for (size_t i = 0; i < src_dims_check.size(); ++i) {
-    bool is_reduced = src_dims_check[i];
-    if (!is_reduced) {
-      (*perm_axis)[offset] = i;
-      dst_dims->at(offset++) = src_dims[i];
-    }
-  }
-}
-
-template <typename OutT>
-void GetShuffledInput(const FDTensor& input, FDTensor* shuffled_input,
-                      const std::vector<int64_t>& dims) {
-  auto shuffled_dims = input.shape;
-  std::vector<int> perm_axis(input.shape.size());
-  GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis);
-
-  shuffled_input->Allocate(shuffled_dims, input.dtype);
-  // TODO(zhoushunjie) : Need to implement trans function
-  // phi::funcs::TransposeNormal<DeviceContext, OutT> trans;
-  // trans(dev_ctx, input, shuffled_input, perm_axis);
-}
-
-//////////////// HandleLargeDim
-template <typename OutT, typename Functor>
-void HandleLargeDim(const FDTensor& input, FDTensor* output,
-                    const std::vector<int64_t>& dims, bool keep_dim) {
-  //  shuffle the reduced dim to the end
-  FDTensor shuffled_input;
-  GetShuffledInput<OutT>(input, &shuffled_input, dims);
-
-  // transpose to 2D tensor whose shape is {unreduced, reduced}.
-  const int64_t unreduced = output->Numel();
-  const int64_t reduced = shuffled_input.Numel() / unreduced;
-  shuffled_input.Allocate({unreduced, reduced}, TypeToDataType<OutT>::dtype);
-
-  auto output_dim = output->shape;
-  output->Allocate({unreduced}, TypeToDataType<OutT>::dtype);
-
-  ReduceFunctor<OutT, 2, 1, Functor>(shuffled_input, output, {1}, keep_dim);
-  output->shape = output_dim;
-}
-
-////////////// ReduceKernel
-
-template <typename OutT, typename Functor>
-void ReduceKernelImpl(const FDTensor& input, FDTensor* output,
-                      const std::vector<int64_t>& dims, bool keep_dim,
-                      bool reduce_all) {
-  output->Allocate({1}, TypeToDataType<OutT>::dtype);
-  const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
-  if (reduce_all) {
-    // Flatten and reduce 1-D tensor
-    auto x = EigenVector<OutT>::Flatten(input);
-    auto out = EigenScalar<OutT>::From(*output);
-    auto reduce_dim = Eigen::array<int, 1>({{0}});
-
-    Functor functor;
-    functor(dev, &x, &out, reduce_dim);
-  } else {
-    int ndim = input.shape.size();
-    int rdim = dims.size();
-    if (ndim > 3) {
-      HandleLargeDim<OutT, Functor>(input, output, dims, keep_dim);
-    } else {
-      HANDLE_REDUCE_DIM(4, 3);
-      HANDLE_REDUCE_DIM(4, 2);
-      HANDLE_REDUCE_DIM(4, 1);
-      HANDLE_REDUCE_DIM(3, 2);
-      HANDLE_REDUCE_DIM(3, 1);
-      HANDLE_REDUCE_DIM(2, 1);
-      HANDLE_REDUCE_DIM(1, 1);
-    }
-  }
-}
-
-template <typename OutT, typename Functor>
-void BoolReduceKernel(const FDTensor& input, FDTensor* output,
-                      const std::vector<int64_t>& dims, bool keep_dim,
-                      bool reduce_all) {
-  // The dims has full dim, set the reduce_all is True
-  const auto& input_dim_size = input.shape.size();
-  std::set<int> dims_set(dims.begin(), dims.end());
-  bool full_dim = true;
-  for (auto i = 0; i < input_dim_size; i++) {
-    if (dims_set.find(i) == dims_set.end()) {
-      full_dim = false;
-      break;
-    }
-  }
-  reduce_all = (reduce_all || full_dim);
-
-  ReduceKernelImpl<bool, Functor>(input, output, dims, keep_dim, reduce_all);
-}
-
-template <typename Functor>
-void Reduce(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-            bool keep_dim, bool reduce_all) {
-  // If the dims has full dim, set the reduce_all is True
-  const int& input_dim_size = x.shape.size();
-  std::set<int> dims_set(dims.begin(), dims.end());
-  bool full_dim = true;
-  for (int i = 0; i < input_dim_size; ++i) {
-    if (dims_set.find(i) == dims_set.end() &&
-        dims_set.find(i - input_dim_size) == dims_set.end()) {
-      full_dim = false;
-      break;
-    }
-  }
-  reduce_all = (reduce_all || full_dim);
-
-  FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
-                       ReduceKernelImpl<data_t, Functor>(x, out, dims, keep_dim,
-                                                         reduce_all);
-                     }));
-}
-
-void Max(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<MaxFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Min(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<MinFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Sum(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<SumFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void All(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  BoolReduceKernel<bool, AllFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Any(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  BoolReduceKernel<bool, AnyFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Mean(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-          bool keep_dim, bool reduce_all) {
-  Reduce<MeanFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Prod(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-          bool keep_dim, bool reduce_all) {
-  Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-#endif
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/function/reduce.h
+++ b/csrcs/fastdeploy/function/reduce.h
@@ -1,100 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/core/fd_tensor.h"
-
-namespace fastdeploy {
-
-#ifdef ENABLE_FDTENSOR_FUNC
-/** Excute the maximum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Max(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the minimum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Min(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the sum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Sum(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the all operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void All(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the any operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Any(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the mean operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Mean(const FDTensor& x, FDTensor* out,
-                          const std::vector<int64_t>& dims,
-                          bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the product operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out,
-                          const std::vector<int64_t>& dims,
-                          bool keep_dim = false, bool reduce_all = false);
-
-#endif
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/function/reduce_functor.h
+++ b/csrcs/fastdeploy/function/reduce_functor.h
@@ -1,76 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/function/eigen.h"
-namespace fastdeploy {
-
-//////// Max Functor ///////
-struct MaxFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->maximum(dim);
-  }
-};
-
-//////// Min Functor ///////
-struct MinFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->minimum(dim);
-  }
-};
-
-//////// Sum Functor ///////
-struct SumFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->sum(dim);
-  }
-};
-
-//////// All Functor ///////
-struct AllFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->all(dim);
-  }
-};
-
-//////// Any Functor ///////
-struct AnyFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->any(dim);
-  }
-};
-
-//////// Mean Functor ///////
-struct MeanFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->mean(dim);
-  }
-};
-
-//////// Prod Functor ///////
-struct ProdFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->prod(dim);
-  }
-};
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/pybind/fastdeploy_model.cc
+++ b/csrcs/fastdeploy/pybind/fastdeploy_model.cc
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-#include "fastdeploy/fastdeploy_model.h"
-
-namespace fastdeploy {
-
-void BindFDModel(pybind11::module& m) {
-  pybind11::class_<FastDeployModel>(m, "FastDeployModel")
-      .def(pybind11::init<>(), "Default Constructor")
-      .def("model_name", &FastDeployModel::ModelName)
-      .def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime)
-      .def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime)
-      .def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime)
-      .def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime)
-      .def("initialized", &FastDeployModel::Initialized)
-      .def_readwrite("runtime_option", &FastDeployModel::runtime_option)
-      .def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends)
-      .def_readwrite("valid_gpu_backends",
-                     &FastDeployModel::valid_gpu_backends);
-}
-
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -1,134 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindRuntime(pybind11::module& m) {
-  pybind11::class_<RuntimeOption>(m, "RuntimeOption")
-      .def(pybind11::init())
-      .def("set_model_path", &RuntimeOption::SetModelPath)
-      .def("use_gpu", &RuntimeOption::UseGpu)
-      .def("use_cpu", &RuntimeOption::UseCpu)
-      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
-      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
-      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
-      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
-      .def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN)
-      .def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN)
-      .def("set_paddle_mkldnn_cache_size",
-           &RuntimeOption::SetPaddleMKLDNNCacheSize)
-      .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
-      .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
-      .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
-      .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
-      .def_readwrite("model_file", &RuntimeOption::model_file)
-      .def_readwrite("params_file", &RuntimeOption::params_file)
-      .def_readwrite("model_format", &RuntimeOption::model_format)
-      .def_readwrite("backend", &RuntimeOption::backend)
-      .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
-      .def_readwrite("device_id", &RuntimeOption::device_id)
-      .def_readwrite("device", &RuntimeOption::device)
-      .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
-      .def_readwrite("ort_inter_op_num_threads",
-                     &RuntimeOption::ort_inter_op_num_threads)
-      .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
-      .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
-      .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
-      .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
-      .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
-      .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
-      .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
-      .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
-      .def_readwrite("trt_max_workspace_size",
-                     &RuntimeOption::trt_max_workspace_size);
-
-  pybind11::class_<TensorInfo>(m, "TensorInfo")
-      .def_readwrite("name", &TensorInfo::name)
-      .def_readwrite("shape", &TensorInfo::shape)
-      .def_readwrite("dtype", &TensorInfo::dtype);
-
-  pybind11::class_<Runtime>(m, "Runtime")
-      .def(pybind11::init())
-      .def("init", &Runtime::Init)
-      .def("infer",
-           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
-             std::vector<FDTensor> inputs(data.size());
-             int index = 0;
-             for (auto iter = data.begin(); iter != data.end(); ++iter) {
-               inputs[index].dtype =
-                   NumpyDataTypeToFDDataType(iter->second.dtype());
-               inputs[index].shape.insert(
-                   inputs[index].shape.begin(), iter->second.shape(),
-                   iter->second.shape() + iter->second.ndim());
-               // TODO(jiangjiajun) Maybe skip memory copy is a better choice
-               // use SetExternalData
-               inputs[index].data.resize(iter->second.nbytes());
-               memcpy(inputs[index].data.data(), iter->second.mutable_data(),
-                      iter->second.nbytes());
-               inputs[index].name = iter->first;
-               index += 1;
-             }
-
-             std::vector<FDTensor> outputs(self.NumOutputs());
-             self.Infer(inputs, &outputs);
-
-             std::vector<pybind11::array> results;
-             results.reserve(outputs.size());
-             for (size_t i = 0; i < outputs.size(); ++i) {
-               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
-               results.emplace_back(
-                   pybind11::array(numpy_dtype, outputs[i].shape));
-               memcpy(results[i].mutable_data(), outputs[i].data.data(),
-                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
-             }
-             return results;
-           })
-      .def("num_inputs", &Runtime::NumInputs)
-      .def("num_outputs", &Runtime::NumOutputs)
-      .def("get_input_info", &Runtime::GetInputInfo)
-      .def("get_output_info", &Runtime::GetOutputInfo)
-      .def_readonly("option", &Runtime::option);
-
-  pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
-                           "Backend for inference.")
-      .value("UNKOWN", Backend::UNKNOWN)
-      .value("ORT", Backend::ORT)
-      .value("TRT", Backend::TRT)
-      .value("PDINFER", Backend::PDINFER);
-  pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
-                            "Frontend for inference.")
-      .value("PADDLE", Frontend::PADDLE)
-      .value("ONNX", Frontend::ONNX);
-  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
-                          "Device for inference.")
-      .value("CPU", Device::CPU)
-      .value("GPU", Device::GPU);
-
-  pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
-                              "Data type of FastDeploy.")
-      .value("BOOL", FDDataType::BOOL)
-      .value("INT8", FDDataType::INT8)
-      .value("INT16", FDDataType::INT16)
-      .value("INT32", FDDataType::INT32)
-      .value("INT64", FDDataType::INT64)
-      .value("FP32", FDDataType::FP32)
-      .value("FP64", FDDataType::FP64)
-      .value("UINT8", FDDataType::UINT8);
-
-  m.def("get_available_backends", []() { return GetAvailableBackends(); });
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/pybind/main.cc.in
+++ b/csrcs/fastdeploy/pybind/main.cc.in
@@ -1,127 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindRuntime(pybind11::module&);
-void BindFDModel(pybind11::module&);
-void BindVision(pybind11::module&);
-
-pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
-  pybind11::dtype dt;
-  if (fd_dtype == FDDataType::INT32) {
-    dt = pybind11::dtype::of<int32_t>();
-  } else if (fd_dtype == FDDataType::INT64) {
-    dt = pybind11::dtype::of<int64_t>();
-  } else if (fd_dtype == FDDataType::FP32) {
-    dt = pybind11::dtype::of<float>();
-  } else if (fd_dtype == FDDataType::FP64) {
-    dt = pybind11::dtype::of<double>();
-  } else if (fd_dtype == FDDataType::UINT8) {
-    dt = pybind11::dtype::of<uint8_t>();
-  } else {
-    FDASSERT(false, "The function doesn't support data type of " +
-                        Str(fd_dtype) + ".");
-  }
-  return dt;
-}
-
-FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
-  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
-    return FDDataType::INT32;
-  } else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
-    return FDDataType::INT64;
-  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
-    return FDDataType::FP32;
-  } else if (np_dtype.is(pybind11::dtype::of<double>())) {
-    return FDDataType::FP64;
-  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
-    return FDDataType::UINT8;
-  }
-  FDASSERT(false,
-           "NumpyDataTypeToFDDataType() only support "
-           "int32/int64/float32/float64 now.");
-  return FDDataType::FP32;
-}
-
-void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
-                     bool share_buffer) {
-  tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
-  tensor->shape.insert(tensor->shape.begin(), pyarray.shape(),
-                       pyarray.shape() + pyarray.ndim());
-  if (share_buffer) {
-    tensor->external_data_ptr = pyarray.mutable_data();
-  } else {
-    tensor->data.resize(pyarray.nbytes());
-    memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
-  }
-}
-
-pybind11::array TensorToPyArray(const FDTensor& tensor) {
-  auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype);
-  auto out = pybind11::array(numpy_dtype, tensor.shape);
-  memcpy(out.mutable_data(), tensor.Data(), tensor.Numel() * FDDataTypeSize(tensor.dtype));
-  return out;
-}
-
-#ifdef ENABLE_VISION
-int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
-  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
-    return CV_32S;
-  } else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
-    return CV_8U;
-  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
-    return CV_8U;
-  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
-    return CV_32F;
-  } else {
-    FDASSERT(
-        false,
-        "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
-        "now.");
-  }
-  return CV_8U;
-}
-
-cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
-  auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
-  FDASSERT(
-      pyarray.ndim() == 3,
-      "Require rank of array to be 3 with HWC format while converting it to "
-      "cv::Mat.");
-  int channel = *(pyarray.shape() + 2);
-  int height = *(pyarray.shape());
-  int width = *(pyarray.shape() + 1);
-  return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
-                 pyarray.mutable_data());
-}
-#endif
-
-PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
-  m.doc() =
-      "Make programer easier to deploy deeplearning model, save time to save "
-      "the world!";
-
-  BindRuntime(m);
-  BindFDModel(m);
-#ifdef ENABLE_VISION
-  auto vision_module =
-      m.def_submodule("vision", "Vision module of FastDeploy.");
-  BindVision(vision_module);
-#endif
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/pybind/main.h
+++ b/csrcs/fastdeploy/pybind/main.h
@@ -1,90 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <type_traits>
-
-#include "fastdeploy/fastdeploy_runtime.h"
-
-#ifdef ENABLE_VISION
-#include "fastdeploy/vision.h"
-#endif
-
-namespace fastdeploy {
-
-void BindBackend(pybind11::module&);
-void BindVision(pybind11::module&);
-
-pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
-
-FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype);
-
-void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
-                     bool share_buffer = false);
-pybind11::array TensorToPyArray(const FDTensor& tensor);
-
-#ifdef ENABLE_VISION
-cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
-#endif
-
-template <typename T>
-FDDataType CTypeToFDDataType() {
-  if (std::is_same<T, int32_t>::value) {
-    return FDDataType::INT32;
-  } else if (std::is_same<T, int64_t>::value) {
-    return FDDataType::INT64;
-  } else if (std::is_same<T, float>::value) {
-    return FDDataType::FP32;
-  } else if (std::is_same<T, double>::value) {
-    return FDDataType::FP64;
-  }
-  FDASSERT(false,
-           "CTypeToFDDataType only support int32/int64/float32/float64 now.");
-  return FDDataType::FP32;
-}
-
-template <typename T>
-std::vector<pybind11::array> PyBackendInfer(
-    T& self, const std::vector<std::string>& names,
-    std::vector<pybind11::array>& data) {
-  std::vector<FDTensor> inputs(data.size());
-  for (size_t i = 0; i < data.size(); ++i) {
-    // TODO(jiangjiajun) here is considered to use user memory directly
-    inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype());
-    inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
-                           data[i].shape() + data[i].ndim());
-    inputs[i].data.resize(data[i].nbytes());
-    memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
-    inputs[i].name = names[i];
-  }
-
-  std::vector<FDTensor> outputs(self.NumOutputs());
-  self.Infer(inputs, &outputs);
-
-  std::vector<pybind11::array> results;
-  results.reserve(outputs.size());
-  for (size_t i = 0; i < outputs.size(); ++i) {
-    auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
-    results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
-    memcpy(results[i].mutable_data(), outputs[i].data.data(),
-           outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
-  }
-  return results;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text.h
+++ b/csrcs/fastdeploy/text.h
@@ -1,19 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include "fastdeploy/core/config.h"
-#ifdef ENABLE_TEXT
-#include "fastdeploy/text/text_model.h"
-#endif
--- a/csrcs/fastdeploy/text/common/option.h
+++ b/csrcs/fastdeploy/text/common/option.h
@@ -1,26 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-struct FASTDEPLOY_DECL TextPreprocessOption {};
-struct FASTDEPLOY_DECL TextPostprocessOption {};
-struct FASTDEPLOY_DECL PredictionOption {};
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/common/result.cc
+++ b/csrcs/fastdeploy/text/common/result.cc
@@ -1,18 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/text/common/result.h"
-
-namespace fastdeploy {
-namespace text {}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/common/result.h
+++ b/csrcs/fastdeploy/text/common/result.h
@@ -1,23 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-struct FASTDEPLOY_DECL Result {};
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
+++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
@@ -1,31 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/postprocessor/postprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool Postprocessor::Decode(const std::vector<FDTensor>& model_result,
-                           Result* decoded_result) const {
-  return true;
-}
-
-bool Postprocessor::DecodeBatch(const std::vector<FDTensor>& model_result,
-                                Result* decoded_result) const {
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/postprocessor/postprocessor.h
+++ b/csrcs/fastdeploy/text/postprocessor/postprocessor.h
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/text/common/result.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Postprocessor {
- public:
-  virtual bool Decode(const std::vector<FDTensor>& model_result,
-                      Result* decoded_result) const;
-  virtual bool DecodeBatch(const std::vector<FDTensor>& model_result,
-                           Result* decoded_result) const;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
+++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
@@ -1,32 +0,0 @@
-
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/preprocessor/preprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool Preprocessor::Encode(const std::string& raw_text,
-                          std::vector<FDTensor>* encoded_tensor) const {
-  return true;
-}
-
-bool Preprocessor::EncodeBatch(const std::vector<std::string>& raw_texts,
-                               std::vector<FDTensor>* encoded_tensor) const {
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/preprocessor/preprocessor.h
+++ b/csrcs/fastdeploy/text/preprocessor/preprocessor.h
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <memory>
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Preprocessor {
- public:
-  virtual bool Encode(const std::string& raw_text,
-                      std::vector<FDTensor>* encoded_tensor) const;
-  virtual bool EncodeBatch(const std::vector<std::string>& raw_texts,
-                           std::vector<FDTensor>* encoded_tensor) const;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/text_model.cc
+++ b/csrcs/fastdeploy/text/text_model.cc
@@ -1,79 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/text_model.h"
-#include "fastdeploy/text/common/option.h"
-#include "fastdeploy/text/common/result.h"
-#include "fastdeploy/text/postprocessor/postprocessor.h"
-#include "fastdeploy/text/preprocessor/preprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool TextModel::Predict(const std::string& raw_text, Result* result,
-                        const PredictionOption& option) {
-  // Preprocess
-  std::vector<FDTensor> input_tensor;
-  std::vector<FDTensor> output_tensor;
-  if (!preprocessor_->Encode(raw_text, &input_tensor)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  // Inference Runtime
-  if (!Infer(input_tensor, &output_tensor)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  // Postprocess
-  if (postprocessor_->Decode(output_tensor, result)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool TextModel::PredictBatch(const std::vector<std::string>& raw_text_array,
-                             Result* results, const PredictionOption& option) {
-  // Preprocess
-  std::vector<FDTensor> input_tensor;
-  std::vector<FDTensor> output_tensor;
-  if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  // Inference Runtime
-  if (!Infer(input_tensor, &output_tensor)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  // Postprocess
-  if (postprocessor_->DecodeBatch(output_tensor, results)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/text_model.h
+++ b/csrcs/fastdeploy/text/text_model.h
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <memory>
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Preprocessor;
-class Postprocessor;
-class Result;
-class PredictionOption;
-
-class FASTDEPLOY_DECL TextModel : public FastDeployModel {
- public:
-  virtual std::string ModelName() const { return "TextModel"; }
-  virtual bool Predict(const std::string& raw_text, Result* result,
-                       const PredictionOption& option);
-  virtual bool PredictBatch(const std::vector<std::string>& raw_text_array,
-                            Result* result, const PredictionOption& option);
-  template <typename T, typename... Args>
-  void SetPreprocessor(Args&&... args) {
-    preprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
-  }
-  template <typename T, typename... Args>
-  void SetPostprocessor(Args&&... args) {
-    postprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
-  }
-
- private:
-  std::unique_ptr<Preprocessor> preprocessor_;
-  std::unique_ptr<Postprocessor> postprocessor_;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/text/text_pybind.cc
+++ b/csrcs/fastdeploy/text/text_pybind.cc
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
--- a/csrcs/fastdeploy/utils/perf.h
+++ b/csrcs/fastdeploy/utils/perf.h
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/utils/utils.h"
-#include <chrono> // NOLINT
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL TimeCounter {
- public:
-  void Start() { begin_ = std::chrono::system_clock::now(); }
-
-  void End() { end_ = std::chrono::system_clock::now(); }
-
-  double Duration() {
-    auto duration =
-        std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
-    return static_cast<double>(duration.count()) *
-           std::chrono::microseconds::period::num /
-           std::chrono::microseconds::period::den;
-  }
-
-  void PrintInfo(const std::string& prefix = "TimeCounter: ",
-                 bool print_out = true) {
-    if (!print_out) {
-      return;
-    }
-    FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
-  }
-
- private:
-  std::chrono::time_point<std::chrono::system_clock> begin_;
-  std::chrono::time_point<std::chrono::system_clock> end_;
-};
-
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/utils/unique_ptr.h
+++ b/csrcs/fastdeploy/utils/unique_ptr.h
@@ -1,58 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <memory>
-
-namespace fastdeploy {
-namespace utils {
-// Trait to select overloads and return types for MakeUnique.
-template <typename T>
-struct MakeUniqueResult {
-  using scalar = std::unique_ptr<T>;
-};
-template <typename T>
-struct MakeUniqueResult<T[]> {
-  using array = std::unique_ptr<T[]>;
-};
-template <typename T, size_t N>
-struct MakeUniqueResult<T[N]> {
-  using invalid = void;
-};
-
-// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
-// It is designed to be 100% compatible with std::make_unique so that the
-// eventual switchover will be a simple renaming operation.
-template <typename T, typename... Args>
-typename MakeUniqueResult<T>::scalar make_unique(Args &&... args) {  // NOLINT
-  return std::unique_ptr<T>(
-      new T(std::forward<Args>(args)...));  // NOLINT(build/c++11)
-}
-
-// Overload for array of unknown bound.
-// The allocation of arrays needs to use the array form of new,
-// and cannot take element constructor arguments.
-template <typename T>
-typename MakeUniqueResult<T>::array make_unique(size_t n) {
-  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
-}
-
-// Reject arrays of known bound.
-template <typename T, typename... Args>
-typename MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) =
-    delete;  // NOLINT
-
-}  // namespace utils
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/utils/utils.cc
+++ b/csrcs/fastdeploy/utils/utils.cc
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-FDLogger::FDLogger(bool verbose, const std::string& prefix) {
-  verbose_ = verbose;
-  line_ = "";
-  prefix_ = prefix;
-}
-
-FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) {
-  if (!verbose_) {
-    return *this;
-  }
-  std::cout << prefix_ << " " << line_ << std::endl;
-  line_ = "";
-  return *this;
-}
-
-bool ReadBinaryFromFile(const std::string& file, std::string* contents) {
-  std::ifstream fin(file, std::ios::in | std::ios::binary);
-  if (!fin.is_open()) {
-    FDERROR << "Failed to open file: " << file << " to read." << std::endl;
-    return false;
-  }
-  fin.seekg(0, std::ios::end);
-  contents->clear();
-  contents->resize(fin.tellg());
-  fin.seekg(0, std::ios::beg);
-  fin.read(&(contents->at(0)), contents->size());
-  fin.close();
-  return true;
-}
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/utils/utils.h
+++ b/csrcs/fastdeploy/utils/utils.h
@@ -1,150 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <stdlib.h>
-
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <string>
-
-#if defined(_WIN32)
-#ifdef FASTDEPLOY_LIB
-#define FASTDEPLOY_DECL __declspec(dllexport)
-#else
-#define FASTDEPLOY_DECL __declspec(dllimport)
-#endif  // FASTDEPLOY_LIB
-#else
-#define FASTDEPLOY_DECL __attribute__((visibility("default")))
-#endif  // _WIN32
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL FDLogger {
- public:
-  FDLogger() {
-    line_ = "";
-    prefix_ = "[FastDeploy]";
-    verbose_ = true;
-  }
-  explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]");
-
-  template <typename T>
-  FDLogger& operator<<(const T& val) {
-    if (!verbose_) {
-      return *this;
-    }
-    std::stringstream ss;
-    ss << val;
-    line_ += ss.str();
-    return *this;
-  }
-  FDLogger& operator<<(std::ostream& (*os)(std::ostream&));
-  ~FDLogger() {
-    if (!verbose_ && line_ != "") {
-      std::cout << line_ << std::endl;
-    }
-  }
-
- private:
-  std::string line_;
-  std::string prefix_;
-  bool verbose_ = true;
-};
-
-FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
-                                        std::string* contents);
-
-#ifndef __REL_FILE__
-#define __REL_FILE__ __FILE__
-#endif
-
-#define FDERROR                                                \
-  FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \
-                            << ")::" << __FUNCTION__ << "\t"
-
-#define FDWARNING                                                \
-  FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \
-                              << ")::" << __FUNCTION__ << "\t"
-
-#define FDINFO                                                \
-  FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
-                           << ")::" << __FUNCTION__ << "\t"
-
-#define FDASSERT(condition, message) \
-  if (!(condition)) {                \
-    FDERROR << message << std::endl; \
-    std::abort();                    \
-  }
-
-///////// Basic Marco ///////////
-
-#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \
-  case enum_type: {                                                       \
-    using HINT = type;                                                    \
-    __VA_ARGS__();                                                        \
-    break;                                                                \
-  }
-
-#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \
-  FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
-
-#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...)                                \
-  [&] {                                                                    \
-    const auto& __dtype__ = TYPE;                                          \
-    switch (__dtype__) {                                                   \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::BOOL, bool,     \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float,    \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double,   \
-                           __VA_ARGS__)                                    \
-      default:                                                             \
-        FDASSERT(false, "Invalid enum data type.")                         \
-    }                                                                      \
-  }()
-
-#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...)                            \
-  [&] {                                                                  \
-    const auto& __dtype__ = TYPE;                                        \
-    switch (__dtype__) {                                                 \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float,  \
-                           __VA_ARGS__)                                  \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
-                           __VA_ARGS__)                                  \
-      default:                                                           \
-        FDASSERT(false, "Invalid enum data type.")                       \
-    }                                                                    \
-  }()
-
-#define FD_VISIT_INT_TYPES(TYPE, NAME, ...)                                \
-  [&] {                                                                    \
-    const auto& __dtype__ = TYPE;                                          \
-    switch (__dtype__) {                                                   \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
-                           __VA_ARGS__)                                    \
-      default:                                                             \
-        FDASSERT(false, "Invalid enum data type.")                         \
-    }                                                                      \
-  }()
-
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision.h
+++ b/csrcs/fastdeploy/vision.h
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include "fastdeploy/core/config.h"
-#ifdef ENABLE_VISION
-#include "fastdeploy/vision/detection/contrib/nanodet_plus.h"
-#include "fastdeploy/vision/detection/contrib/scaledyolov4.h"
-#include "fastdeploy/vision/detection/contrib/yolor.h"
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
-#include "fastdeploy/vision/detection/contrib/yolov5lite.h"
-#include "fastdeploy/vision/detection/contrib/yolov6.h"
-#include "fastdeploy/vision/detection/contrib/yolov7.h"
-#include "fastdeploy/vision/detection/contrib/yolox.h"
-#include "fastdeploy/vision/facedet/contrib/retinaface.h"
-#include "fastdeploy/vision/facedet/contrib/scrfd.h"
-#include "fastdeploy/vision/facedet/contrib/ultraface.h"
-#include "fastdeploy/vision/facedet/contrib/yolov5face.h"
-#include "fastdeploy/vision/faceid/contrib/arcface.h"
-#include "fastdeploy/vision/faceid/contrib/cosface.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-#include "fastdeploy/vision/faceid/contrib/partial_fc.h"
-#include "fastdeploy/vision/faceid/contrib/vpl.h"
-#include "fastdeploy/vision/matting/contrib/modnet.h"
-#include "fastdeploy/vision/ppcls/model.h"
-#include "fastdeploy/vision/detection/ppdet/model.h"
-#include "fastdeploy/vision/ppseg/model.h"
-#endif
-
-#include "fastdeploy/vision/visualize/visualize.h"
--- a/csrcs/fastdeploy/vision/AddModel.md
+++ b/csrcs/fastdeploy/vision/AddModel.md
@@ -1,3 +0,0 @@
-# 如何添加一个模型
-
-本文档以[yolov5](https://github.com/ultralytics/yolov5)为例，说明如何添加新的模型支持。
--- a/csrcs/fastdeploy/vision/common/processors/base.cc
+++ b/csrcs/fastdeploy/vision/common/processors/base.cc
@@ -1,61 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/base.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-
-ProcLib Processor::default_lib = ProcLib::DEFAULT;
-
-bool Processor::CpuRun(Mat* mat) {
-  FDERROR << "Unimplemented CpuRun." << std::endl;
-  return false;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Processor::GpuRun(Mat* mat) {
-  FDERROR << "Unimplemented GpuRun." << std::endl;
-  return false;
-}
-#endif
-
-bool Processor::operator()(Mat* mat, ProcLib lib) {
-  // if default_lib is set
-  // then use default_lib
-  ProcLib target = lib;
-  if (default_lib != ProcLib::DEFAULT) {
-    target = default_lib;
-  }
-
-  if (target == ProcLib::OPENCV_CUDA) {
-#ifdef ENABLE_OPENCV_CUDA
-    bool ret = GpuRun(mat);
-    mat->device = Device::GPU;
-    return ret;
-#else
-    FDERROR
-        << "OpenCV is not compiled with CUDA, cannot process image with CUDA."
-        << std::endl;
-    return false;
-#endif
-  }
-  bool ret = CpuRun(mat);
-  mat->device = Device::CPU;
-  return ret;
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/base.h
+++ b/csrcs/fastdeploy/vision/common/processors/base.h
@@ -1,48 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/utils/utils.h"
-#include "fastdeploy/vision/common/processors/mat.h"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-namespace vision {
-
-enum ProcLib { DEFAULT, OPENCV_CPU, OPENCV_CUDA };
-
-class Processor {
- public:
-  // default_lib has the highest priority
-  // all the function in `processor` will force to use
-  // default_lib if this flag is set.
-  // DEFAULT means this flag is not set
-  static ProcLib default_lib;
-
-  //  virtual bool ShapeInfer(const std::vector<int>& in_shape,
-  //                          std::vector<int>* out_shape) = 0;
-  virtual std::string Name() = 0;
-  virtual bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  virtual bool GpuRun(Mat* mat);
-#endif
-
-  virtual bool operator()(Mat* mat,
-                          ProcLib lib = ProcLib::OPENCV_CPU);
-};
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/cast.cc
+++ b/csrcs/fastdeploy/vision/common/processors/cast.cc
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/cast.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool Cast::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  int c = im->channels();
-  if (dtype_ == "float") {
-    if (im->type() != CV_32FC(c)) {
-      im->convertTo(*im, CV_32FC(c));
-    }
-  } else if (dtype_ == "double") {
-    if (im->type() != CV_64FC(c)) {
-      im->convertTo(*im, CV_64FC(c));
-    }
-  } else {
-    FDWARNING << "Cast not support for " << dtype_
-              << " now! will skip this operation." << std::endl;
-  }
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Cast::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int c = im->channels();
-  if (dtype_ == "float") {
-    if (im->type() != CV_32FC(c)) {
-      im->convertTo(*im, CV_32FC(c));
-    }
-  } else if (dtype_ == "double") {
-    if (im->type() != CV_64FC(c)) {
-      im->convertTo(*im, CV_64FC(c));
-    }
-  } else {
-    FDWARNING << "Cast not support for " << dtype_
-              << " now! will skip this operation." << std::endl;
-  }
-  return true;
-}
-#endif
-
-bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) {
-  auto c = Cast(dtype);
-  return c(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/cast.h
+++ b/csrcs/fastdeploy/vision/common/processors/cast.h
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class Cast : public Processor {
- public:
-  explicit Cast(const std::string& dtype = "float") : dtype_(dtype) {}
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Cast"; }
-  static bool Run(Mat* mat, const std::string& dtype,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  std::string dtype_;
-};
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/center_crop.cc
+++ b/csrcs/fastdeploy/vision/common/processors/center_crop.cc
@@ -1,63 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/center_crop.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool CenterCrop::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  int height = static_cast<int>(im->rows);
-  int width = static_cast<int>(im->cols);
-  if (height < height_ || width < width_) {
-    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
-    return false;
-  }
-  int offset_x = static_cast<int>((width - width_) / 2);
-  int offset_y = static_cast<int>((height - height_) / 2);
-  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
-  *im = (*im)(crop_roi);
-  mat->SetWidth(width_);
-  mat->SetHeight(height_);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool CenterCrop::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int height = static_cast<int>(im->rows);
-  int width = static_cast<int>(im->cols);
-  if (height < height_ || width < width_) {
-    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
-    return false;
-  }
-  int offset_x = static_cast<int>((width - width_) / 2);
-  int offset_y = static_cast<int>((height - height_) / 2);
-  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
-  *im = (*im)(crop_roi);
-  mat->SetWidth(width_);
-  mat->SetHeight(height_);
-  return true;
-}
-#endif
-
-bool CenterCrop::Run(Mat* mat, const int& width, const int& height,
-                     ProcLib lib) {
-  auto c = CenterCrop(width, height);
-  return c(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/center_crop.h
+++ b/csrcs/fastdeploy/vision/common/processors/center_crop.h
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class CenterCrop : public Processor {
- public:
-  CenterCrop(int width, int height) : height_(height), width_(width) {}
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "CenterCrop"; }
-
-  static bool Run(Mat* mat, const int& width, const int& height,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int height_;
-  int width_;
-};
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc
+++ b/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc
@@ -1,58 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/color_space_convert.h"
-
-namespace fastdeploy {
-namespace vision {
-bool BGR2RGB::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool BGR2RGB::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
-  return true;
-}
-#endif
-
-bool RGB2BGR::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool RGB2BGR::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
-  return true;
-}
-#endif
-
-bool BGR2RGB::Run(Mat* mat, ProcLib lib) {
-  auto b = BGR2RGB();
-  return b(mat, lib);
-}
-
-bool RGB2BGR::Run(Mat* mat, ProcLib lib) {
-  auto r = RGB2BGR();
-  return r(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.h
+++ b/csrcs/fastdeploy/vision/common/processors/color_space_convert.h
@@ -1,44 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class BGR2RGB : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  virtual std::string Name() { return "BGR2RGB"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-
-class RGB2BGR : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "RGB2BGR"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/convert.cc
+++ b/csrcs/fastdeploy/vision/common/processors/convert.cc
@@ -1,62 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/convert.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-Convert::Convert(const std::vector<float>& alpha,
-                 const std::vector<float>& beta) {
-  FDASSERT(alpha.size() == beta.size(),
-           "Convert: requires the size of alpha equal to the size of beta.");
-  FDASSERT(alpha.size() != 0,
-           "Convert: requires the size of alpha and beta > 0.");
-  alpha_.assign(alpha.begin(), alpha.end());
-  beta_.assign(beta.begin(), beta.end());
-}
-
-bool Convert::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  std::vector<cv::Mat> split_im;
-  cv::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::merge(split_im, *im);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Convert::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  std::vector<cv::cuda::GpuMat> split_im;
-  cv::cuda::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::cuda::merge(split_im, *im);
-  return true;
-}
-#endif
-
-bool Convert::Run(Mat* mat, const std::vector<float>& alpha,
-                  const std::vector<float>& beta, ProcLib lib) {
-  auto c = Convert(alpha, beta);
-  return c(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/convert.h
+++ b/csrcs/fastdeploy/vision/common/processors/convert.h
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-class Convert : public Processor {
- public:
-  Convert(const std::vector<float>& alpha, const std::vector<float>& beta);
-
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Convert"; }
-
-  // Compute `result = mat * alpha + beta` directly by channel.
-  // The default behavior is the same as OpenCV's convertTo method.
-  static bool Run(Mat* mat, const std::vector<float>& alpha,
-                  const std::vector<float>& beta,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  std::vector<float> alpha_;
-  std::vector<float> beta_;
-};
-}  // namespace vision
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc
+++ b/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc
@@ -1,75 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/hwc2chw.h"
-
-namespace fastdeploy {
-namespace vision {
-bool HWC2CHW::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Mat im_clone = im->clone();
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-
-  //  float* data = reinterpret_cast<float*>(im->data);
-  for (int i = 0; i < rc; ++i) {
-    //    cv::extractChannel(im_clone, cv::Mat(rh, rw, im->type() % 8, data + i
-    //    * rh * rw),
-    //                       i);
-    cv::extractChannel(
-        im_clone,
-        cv::Mat(rh, rw, im->type() % 8,
-                im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())),
-        i);
-  }
-  mat->layout = Layout::CHW;
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool HWC2CHW::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::GpuMat im_clone = im->clone();
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-  int num_pixels = rh * rw;
-  std::vector<cv::cuda::GpuMat> channels{
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[0])),
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels])),
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels * 2]))};
-  cv::cuda::split(im_clone, channels);
-  mat->layout = Layout::CHW;
-  return true;
-}
-#endif
-
-bool HWC2CHW::Run(Mat* mat, ProcLib lib) {
-  auto h = HWC2CHW();
-  return h(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.h
+++ b/csrcs/fastdeploy/vision/common/processors/hwc2chw.h
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class HWC2CHW : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "HWC2CHW"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/mat.cc
+++ b/csrcs/fastdeploy/vision/common/processors/mat.cc
@@ -1,117 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/vision/common/processors/mat.h"
-#include "fastdeploy/utils/utils.h"
-namespace fastdeploy {
-namespace vision {
-
-#ifdef ENABLE_OPENCV_CUDA
-cv::cuda::GpuMat* Mat::GetGpuMat() {
-  if (device == Device::CPU) {
-    gpu_mat.upload(cpu_mat);
-  }
-  return &gpu_mat;
-}
-#endif
-
-cv::Mat* Mat::GetCpuMat() {
-#ifdef ENABLE_OPENCV_CUDA
-  if (device == Device::GPU) {
-    gpu_mat.download(cpu_mat);
-  }
-#endif
-  return &cpu_mat;
-}
-
-void Mat::ShareWithTensor(FDTensor* tensor) {
-  if (device == Device::GPU) {
-#ifdef ENABLE_OPENCV_CUDA
-    tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
-                            GetGpuMat()->ptr());
-    tensor->device = Device::GPU;
-#endif
-  } else {
-    tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
-                            GetCpuMat()->ptr());
-    tensor->device = Device::CPU;
-  }
-  if (layout == Layout::HWC) {
-    tensor->shape = {Height(), Width(), Channels()};
-  }
-}
-
-bool Mat::CopyToTensor(FDTensor* tensor) {
-  cv::Mat* im = GetCpuMat();
-  int total_bytes = im->total() * im->elemSize();
-  if (total_bytes != tensor->Nbytes()) {
-    FDERROR << "While copy Mat to Tensor, requires the memory size be same, "
-               "but now size of Tensor = "
-            << tensor->Nbytes() << ", size of Mat = " << total_bytes << "."
-            << std::endl;
-    return false;
-  }
-  memcpy(tensor->MutableData(), im->ptr(), im->total() * im->elemSize());
-  return true;
-}
-
-void Mat::PrintInfo(const std::string& flag) {
-  cv::Mat* im = GetCpuMat();
-  cv::Scalar mean = cv::mean(*im);
-  std::cout << flag << ": "
-            << "Channel=" << Channels() << ", height=" << Height()
-            << ", width=" << Width() << ", mean=";
-  for (int i = 0; i < Channels(); ++i) {
-    std::cout << mean[i] << " ";
-  }
-  std::cout << std::endl;
-}
-
-FDDataType Mat::Type() {
-  int type = -1;
-  if (device == Device::GPU) {
-#ifdef ENABLE_OPENCV_CUDA
-    type = gpu_mat.type();
-#endif
-  } else {
-    type = cpu_mat.type();
-  }
-  if (type < 0) {
-    FDASSERT(false,
-             "While calling Mat::Type(), get negative value, which is not "
-             "expected!.");
-  }
-  type = type % 8;
-  if (type == 0) {
-    return FDDataType::UINT8;
-  } else if (type == 1) {
-    return FDDataType::INT8;
-  } else if (type == 2) {
-    FDASSERT(false, "While calling Mat::Type(), get UINT16 type which is not "
-                    "supported now.");
-  } else if (type == 3) {
-    return FDDataType::INT16;
-  } else if (type == 4) {
-    return FDDataType::INT32;
-  } else if (type == 5) {
-    return FDDataType::FP32;
-  } else if (type == 6) {
-    return FDDataType::FP64;
-  } else {
-    FDASSERT(false, "While calling Mat::Type(), get type = " +
-                        std::to_string(type) + ", which is not expected!.");
-  }
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/mat.h
+++ b/csrcs/fastdeploy/vision/common/processors/mat.h
@@ -1,80 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/core/fd_tensor.h"
-#include "opencv2/core/core.hpp"
-
-#ifdef ENABLE_OPENCV_CUDA
-#include "opencv2/core/cuda.hpp"
-#include "opencv2/cudaarithm.hpp"
-#include "opencv2/cudaimgproc.hpp"
-#include "opencv2/cudawarping.hpp"
-#endif
-
-namespace fastdeploy {
-namespace vision {
-
-enum Layout { HWC, CHW };
-
-struct FASTDEPLOY_DECL Mat {
-  explicit Mat(cv::Mat& mat) {
-    cpu_mat = mat;
-    device = Device::CPU;
-    layout = Layout::HWC;
-    height = cpu_mat.rows;
-    width = cpu_mat.cols;
-    channels = cpu_mat.channels();
-  }
-
- private:
-  int channels;
-  int height;
-  int width;
-  cv::Mat cpu_mat;
-#ifdef ENABLE_OPENCV_CUDA
-  cv::cuda::GpuMat gpu_mat;
-#endif
-
- public:
-#ifdef ENABLE_OPENCV_CUDA
-  cv::cuda::GpuMat* GetGpuMat();
-#endif
-  cv::Mat* GetCpuMat();
-
-  FDDataType Type();
-  int Channels() const { return channels; }
-  int Width() const { return width; }
-  int Height() const { return height; }
-  void SetChannels(int s) { channels = s; }
-  void SetWidth(int w) { width = w; }
-  void SetHeight(int h) { height = h; }
-
-  // Transfer the vision::Mat to FDTensor
-  void ShareWithTensor(FDTensor* tensor);
-  // Only support copy to cpu tensor now
-  bool CopyToTensor(FDTensor* tensor);
-
-  // debug functions
-  // TODO(jiangjiajun) Develop a right process pipeline with c++ is not a easy
-  // things
-  // Will add more debug function here to help debug processed image
-  // This function will print shape / mean of each channels of the Mat
-  void PrintInfo(const std::string& flag);
-
-  Layout layout = Layout::HWC;
-  Device device = Device::CPU;
-};
-
-}  // namespace vision
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/normalize.cc
+++ b/csrcs/fastdeploy/vision/common/processors/normalize.cc
@@ -1,88 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/normalize.h"
-
-namespace fastdeploy {
-namespace vision {
-Normalize::Normalize(const std::vector<float>& mean,
-                     const std::vector<float>& std, bool is_scale,
-                     const std::vector<float>& min,
-                     const std::vector<float>& max) {
-  FDASSERT(mean.size() == std.size(),
-           "Normalize: requires the size of mean equal to the size of std.");
-  std::vector<double> mean_(mean.begin(), mean.end());
-  std::vector<double> std_(std.begin(), std.end());
-  std::vector<double> min_(mean.size(), 0.0);
-  std::vector<double> max_(mean.size(), 255.0);
-  if (min.size() != 0) {
-    FDASSERT(
-        min.size() == mean.size(),
-        "Normalize: while min is defined, requires the size of min equal to "
-        "the size of mean.");
-    min_.assign(min.begin(), min.end());
-  }
-  if (max.size() != 0) {
-    FDASSERT(
-        min.size() == mean.size(),
-        "Normalize: while max is defined, requires the size of max equal to "
-        "the size of mean.");
-    max_.assign(max.begin(), max.end());
-  }
-  for (auto c = 0; c < mean_.size(); ++c) {
-    double alpha = 1.0;
-    if (is_scale) {
-      alpha /= (max_[c] - min_[c]);
-    }
-    double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c];
-    alpha /= std_[c];
-    alpha_.push_back(alpha);
-    beta_.push_back(beta);
-  }
-}
-
-bool Normalize::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  std::vector<cv::Mat> split_im;
-  cv::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::merge(split_im, *im);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Normalize::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  std::vector<cv::cuda::GpuMat> split_im;
-  cv::cuda::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::cuda::merge(split_im, *im);
-  return true;
-}
-#endif
-
-bool Normalize::Run(Mat* mat, const std::vector<float>& mean,
-                    const std::vector<float>& std, bool is_scale,
-                    const std::vector<float>& min,
-                    const std::vector<float>& max, ProcLib lib) {
-  auto n = Normalize(mean, std, is_scale, min, max);
-  return n(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/normalize.h
+++ b/csrcs/fastdeploy/vision/common/processors/normalize.h
@@ -1,53 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-class Normalize : public Processor {
- public:
-  Normalize(const std::vector<float>& mean, const std::vector<float>& std,
-            bool is_scale = true,
-            const std::vector<float>& min = std::vector<float>(),
-            const std::vector<float>& max = std::vector<float>());
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Normalize"; }
-
-  // While use normalize, it is more recommend not use this function
-  // this function will need to compute result = ((mat / 255) - mean) / std
-  // if we use the following method
-  // ```
-  // auto norm = Normalize(...)
-  // norm(mat)
-  // ```
-  // There will be some precomputation in contruct function
-  // and the `norm(mat)` only need to compute result = mat * alpha + beta
-  // which will reduce lots of time
-  static bool Run(Mat* mat, const std::vector<float>& mean,
-                  const std::vector<float>& std, bool is_scale = true,
-                  const std::vector<float>& min = std::vector<float>(),
-                  const std::vector<float>& max = std::vector<float>(),
-                  ProcLib lib = ProcLib::OPENCV_CPU);
- private:
-  std::vector<float> alpha_;
-  std::vector<float> beta_;
-};
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/pad.cc
+++ b/csrcs/fastdeploy/vision/common/processors/pad.cc
@@ -1,100 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/pad.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool Pad::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "Pad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR << "Pad: Require input channels equals to size of padding value, "
-               "but now channels = "
-            << mat->Channels()
-            << ", the size of padding values = " << value_.size() << "."
-            << std::endl;
-    return false;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
-                     cv::BORDER_CONSTANT, value);
-  mat->SetHeight(im->rows);
-  mat->SetWidth(im->cols);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Pad::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "Pad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR << "Pad: Require input channels equals to size of padding value, "
-               "but now channels = "
-            << mat->Channels()
-            << ", the size of padding values = " << value_.size() << "."
-            << std::endl;
-    return false;
-  }
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  cv::cuda::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
-                           cv::BORDER_CONSTANT, value);
-  mat->SetHeight(im->rows);
-  mat->SetWidth(im->cols);
-  return true;
-}
-#endif
-
-bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left,
-              const int& right, const std::vector<float>& value,
-              ProcLib lib) {
-  auto p = Pad(top, bottom, left, right, value);
-  return p(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/pad.h
+++ b/csrcs/fastdeploy/vision/common/processors/pad.h
@@ -1,50 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class Pad : public Processor {
- public:
-  Pad(int top, int bottom, int left, int right,
-      const std::vector<float>& value) {
-    top_ = top;
-    bottom_ = bottom;
-    left_ = left;
-    right_ = right;
-    value_ = value;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Pad"; }
-
-  static bool Run(Mat* mat, const int& top, const int& bottom, const int& left,
-                  const int& right, const std::vector<float>& value,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int top_;
-  int bottom_;
-  int left_;
-  int right_;
-  std::vector<float> value_;
-};
-} // namespace vision
-} // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc
+++ b/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc
@@ -1,141 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/pad_to_size.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool PadToSize::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "PadToSize: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "PadToSize: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  if (origin_w > width_) {
-    FDERROR << "PadToSize: the input width:" << origin_w
-            << " is greater than the target width: " << width_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_h > height_) {
-    FDERROR << "PadToSize: the input height:" << origin_h
-            << " is greater than the target height: " << height_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_w == width_ && origin_h == height_) {
-    return true;
-  }
-
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  // top, bottom, left, right
-  cv::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0, width_ - origin_w,
-                     cv::BORDER_CONSTANT, value);
-  mat->SetHeight(height_);
-  mat->SetWidth(width_);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool PadToSize::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "PadToSize: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "PadToSize: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  if (origin_w > width_) {
-    FDERROR << "PadToSize: the input width:" << origin_w
-            << " is greater than the target width: " << width_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_h > height_) {
-    FDERROR << "PadToSize: the input height:" << origin_h
-            << " is greater than the target height: " << height_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_w == width_ && origin_h == height_) {
-    return true;
-  }
-
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-
-  // top, bottom, left, right
-  cv::cuda::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0,
-                           width_ - origin_w, cv::BORDER_CONSTANT, value);
-  mat->SetHeight(height_);
-  mat->SetWidth(width_);
-  return true;
-}
-#endif
-
-bool PadToSize::Run(Mat* mat, int width, int height,
-                    const std::vector<float>& value, ProcLib lib) {
-  auto p = PadToSize(width, height, value);
-  return p(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
--- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.h
+++ b/csrcs/fastdeploy/vision/common/processors/pad_to_size.h
@@ -1,46 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class PadToSize : public Processor {
- public:
-  // only support pad with left-top padding mode
-  PadToSize(int width, int height, const std::vector<float>& value) {
-    width_ = width;
-    height_ = height;
-    value_ = value;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "PadToSize"; }
-
-  static bool Run(Mat* mat, int width, int height,
-                  const std::vector<float>& value,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int width_;
-  int height_;
-  std::vector<float> value_;
-};
-}  // namespace vision
-}  // namespace fastdeploy
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`目录代码来源自 https://github.com/NVIDIA/TensorRT`