Remove tensorrt/common codes (#171)

2025-10-07 01:22:59 +08:00 · 2022-08-29 19:21:18 +08:00
parent 00e3a4ad99
commit 737b62a2ba
33 changed files with 232 additions and 14432 deletions
--- a/csrc/fastdeploy/backends/tensorrt/common/BatchStream.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/BatchStream.h
@@ -1,342 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef BATCH_STREAM_H
 #define BATCH_STREAM_H
 #include "NvInfer.h"
 #include "common.h"
 #include <algorithm>
 #include <stdio.h>
 #include <vector>
 class IBatchStream {
 public:
  virtual void reset(int firstBatch) = 0;
  virtual bool next() = 0;
  virtual void skip(int skipCount) = 0;
  virtual float* getBatch() = 0;
  virtual float* getLabels() = 0;
  virtual int getBatchesRead() const = 0;
  virtual int getBatchSize() const = 0;
  virtual nvinfer1::Dims getDims() const = 0;
 };
 class MNISTBatchStream : public IBatchStream {
 public:
  MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
                   const std::string& labelsFile,
                   const std::vector<std::string>& directories)
      : mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
  //!< We already know the dimensions of MNIST images.
  {
    readDataFile(locateFile(dataFile, directories));
    readLabelsFile(locateFile(labelsFile, directories));
  }
  void reset(int firstBatch) override { mBatchCount = firstBatch; }
  bool next() override {
    if (mBatchCount >= mMaxBatches) {
      return false;
    }
    ++mBatchCount;
    return true;
  }
  void skip(int skipCount) override { mBatchCount += skipCount; }
  float* getBatch() override {
    return mData.data() +
           (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
  }
  float* getLabels() override {
    return mLabels.data() + (mBatchCount * mBatchSize);
  }
  int getBatchesRead() const override { return mBatchCount; }
  int getBatchSize() const override { return mBatchSize; }
  nvinfer1::Dims getDims() const override {
    return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
  }
 private:
  void readDataFile(const std::string& dataFilePath) {
    std::ifstream file{dataFilePath.c_str(), std::ios::binary};
    int magicNumber, numImages, imageH, imageW;
    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
    // All values in the MNIST files are big endian.
    magicNumber = samplesCommon::swapEndianness(magicNumber);
    ASSERT(magicNumber == 2051 &&
           "Magic Number does not match the expected value for an MNIST image "
           "set");
    // Read number of images and dimensions
    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
    file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
    file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
    numImages = samplesCommon::swapEndianness(numImages);
    imageH = samplesCommon::swapEndianness(imageH);
    imageW = samplesCommon::swapEndianness(imageW);
    // The MNIST data is made up of unsigned bytes, so we need to cast to float
    // and normalize.
    int numElements = numImages * imageH * imageW;
    std::vector<uint8_t> rawData(numElements);
    file.read(reinterpret_cast<char*>(rawData.data()),
              numElements * sizeof(uint8_t));
    mData.resize(numElements);
    std::transform(rawData.begin(), rawData.end(), mData.begin(),
                   [](uint8_t val) { return static_cast<float>(val) / 255.f; });
  }
  void readLabelsFile(const std::string& labelsFilePath) {
    std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
    int magicNumber, numImages;
    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
    // All values in the MNIST files are big endian.
    magicNumber = samplesCommon::swapEndianness(magicNumber);
    ASSERT(magicNumber == 2049 &&
           "Magic Number does not match the expected value for an MNIST labels "
           "file");
    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
    numImages = samplesCommon::swapEndianness(numImages);
    std::vector<uint8_t> rawLabels(numImages);
    file.read(reinterpret_cast<char*>(rawLabels.data()),
              numImages * sizeof(uint8_t));
    mLabels.resize(numImages);
    std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
                   [](uint8_t val) { return static_cast<float>(val); });
  }
  int mBatchSize{0};
  int mBatchCount{
      0}; //!< The batch that will be read on the next invocation of next()
  int mMaxBatches{0};
  Dims mDims{};
  std::vector<float> mData{};
  std::vector<float> mLabels{};
 };
 class BatchStream : public IBatchStream {
 public:
  BatchStream(int batchSize, int maxBatches, std::string prefix,
              std::string suffix, std::vector<std::string> directories)
      : mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
        mSuffix(suffix), mDataDir(directories) {
    FILE* file = fopen(
        locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
        "rb");
    ASSERT(file != nullptr);
    int d[4];
    size_t readSize = fread(d, sizeof(int), 4, file);
    ASSERT(readSize == 4);
    mDims.nbDims = 4;  // The number of dimensions.
    mDims.d[0] = d[0]; // Batch Size
    mDims.d[1] = d[1]; // Channels
    mDims.d[2] = d[2]; // Height
    mDims.d[3] = d[3]; // Width
    ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
           mDims.d[3] > 0);
    fclose(file);
    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
    mBatch.resize(mBatchSize * mImageSize, 0);
    mLabels.resize(mBatchSize, 0);
    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
    mFileLabels.resize(mDims.d[0], 0);
    reset(0);
  }
  BatchStream(int batchSize, int maxBatches, std::string prefix,
              std::vector<std::string> directories)
      : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
  BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
              std::string listFile, std::vector<std::string> directories)
      : mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
        mListFile(listFile), mDataDir(directories) {
    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
    mBatch.resize(mBatchSize * mImageSize, 0);
    mLabels.resize(mBatchSize, 0);
    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
    mFileLabels.resize(mDims.d[0], 0);
    reset(0);
  }
  // Resets data members
  void reset(int firstBatch) override {
    mBatchCount = 0;
    mFileCount = 0;
    mFileBatchPos = mDims.d[0];
    skip(firstBatch);
  }
  // Advance to next batch and return true, or return false if there is no batch
  // left.
  bool next() override {
    if (mBatchCount == mMaxBatches) {
      return false;
    }
    for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
         batchPos += csize, mFileBatchPos += csize) {
      ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
      if (mFileBatchPos == mDims.d[0] && !update()) {
        return false;
      }
      // copy the smaller of: elements left to fulfill the request, or elements
      // left in the file buffer.
      csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
      std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
                  csize * mImageSize, getBatch() + batchPos * mImageSize);
      std::copy_n(getFileLabels() + mFileBatchPos, csize,
                  getLabels() + batchPos);
    }
    mBatchCount++;
    return true;
  }
  // Skips the batches
  void skip(int skipCount) override {
    if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
        mFileBatchPos == mDims.d[0]) {
      mFileCount += skipCount * mBatchSize / mDims.d[0];
      return;
    }
    int x = mBatchCount;
    for (int i = 0; i < skipCount; i++) {
      next();
    }
    mBatchCount = x;
  }
  float* getBatch() override { return mBatch.data(); }
  float* getLabels() override { return mLabels.data(); }
  int getBatchesRead() const override { return mBatchCount; }
  int getBatchSize() const override { return mBatchSize; }
  nvinfer1::Dims getDims() const override { return mDims; }
 private:
  float* getFileBatch() { return mFileBatch.data(); }
  float* getFileLabels() { return mFileLabels.data(); }
  bool update() {
    if (mListFile.empty()) {
      std::string inputFileName = locateFile(
          mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
      FILE* file = fopen(inputFileName.c_str(), "rb");
      if (!file) {
        return false;
      }
      int d[4];
      size_t readSize = fread(d, sizeof(int), 4, file);
      ASSERT(readSize == 4);
      ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
             mDims.d[3] == d[3]);
      size_t readInputCount =
          fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
      ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
      size_t readLabelCount =
          fread(getFileLabels(), sizeof(float), mDims.d[0], file);
      ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
      fclose(file);
    } else {
      std::vector<std::string> fNames;
      std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
      if (!file) {
        return false;
      }
      sample::gLogInfo << "Batch #" << mFileCount << std::endl;
      file.seekg(((mBatchCount * mBatchSize)) * 7);
      for (int i = 1; i <= mBatchSize; i++) {
        std::string sName;
        std::getline(file, sName);
        sName = sName + ".ppm";
        sample::gLogInfo << "Calibrating with file " << sName << std::endl;
        fNames.emplace_back(sName);
      }
      mFileCount++;
      const int imageC = 3;
      const int imageH = 300;
      const int imageW = 300;
      std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
          fNames.size());
      for (uint32_t i = 0; i < fNames.size(); ++i) {
        readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
      }
      std::vector<float> data(samplesCommon::volume(mDims));
      const float scale = 2.0 / 255.0;
      const float bias = 1.0;
      long int volChl = mDims.d[2] * mDims.d[3];
      // Normalize input data
      for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
           i < mBatchSize; ++i) {
        for (int c = 0; c < mDims.d[1]; ++c) {
          for (int j = 0; j < volChl; ++j) {
            data[i * volImg + c * volChl + j] =
                scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
          }
        }
      }
      std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
    }
    mFileBatchPos = 0;
    return true;
  }
  int mBatchSize{0};
  int mMaxBatches{0};
  int mBatchCount{0};
  int mFileCount{0};
  int mFileBatchPos{0};
  int mImageSize{0};
  std::vector<float> mBatch;      //!< Data for the batch
  std::vector<float> mLabels;     //!< Labels for the batch
  std::vector<float> mFileBatch;  //!< List of image files
  std::vector<float> mFileLabels; //!< List of label files
  std::string mPrefix;            //!< Batch file name prefix
  std::string mSuffix;            //!< Batch file name suffix
  nvinfer1::Dims mDims;           //!< Input dimensions
  std::string mListFile;          //!< File name of the list of image names
  std::vector<std::string>
      mDataDir; //!< Directories where the files can be found
 };
 #endif
--- a/csrc/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
+++ b/csrc/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
@@ -1 +0,0 @@
 exclude_files=.*
--- a/csrc/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
@@ -1,118 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef ENTROPY_CALIBRATOR_H
 #define ENTROPY_CALIBRATOR_H
 #include "BatchStream.h"
 #include "NvInfer.h"
 //! \class EntropyCalibratorImpl
 //!
 //! \brief Implements common functionality for Entropy calibrators.
 //!
 template <typename TBatchStream> class EntropyCalibratorImpl {
 public:
  EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
                        std::string networkName, const char* inputBlobName,
                        bool readCache = true)
      : mStream{stream},
        mCalibrationTableName("CalibrationTable" + networkName),
        mInputBlobName(inputBlobName), mReadCache(readCache) {
    nvinfer1::Dims dims = mStream.getDims();
    mInputCount = samplesCommon::volume(dims);
    CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
    mStream.reset(firstBatch);
  }
  virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
  int getBatchSize() const noexcept { return mStream.getBatchSize(); }
  bool getBatch(void* bindings[], const char* names[],
                int nbBindings) noexcept {
    if (!mStream.next()) {
      return false;
    }
    CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
                     mInputCount * sizeof(float), cudaMemcpyHostToDevice));
    ASSERT(!strcmp(names[0], mInputBlobName));
    bindings[0] = mDeviceInput;
    return true;
  }
  const void* readCalibrationCache(size_t& length) noexcept {
    mCalibrationCache.clear();
    std::ifstream input(mCalibrationTableName, std::ios::binary);
    input >> std::noskipws;
    if (mReadCache && input.good()) {
      std::copy(std::istream_iterator<char>(input),
                std::istream_iterator<char>(),
                std::back_inserter(mCalibrationCache));
    }
    length = mCalibrationCache.size();
    return length ? mCalibrationCache.data() : nullptr;
  }
  void writeCalibrationCache(const void* cache, size_t length) noexcept {
    std::ofstream output(mCalibrationTableName, std::ios::binary);
    output.write(reinterpret_cast<const char*>(cache), length);
  }
 private:
  TBatchStream mStream;
  size_t mInputCount;
  std::string mCalibrationTableName;
  const char* mInputBlobName;
  bool mReadCache{true};
  void* mDeviceInput{nullptr};
  std::vector<char> mCalibrationCache;
 };
 //! \class Int8EntropyCalibrator2
 //!
 //! \brief Implements Entropy calibrator 2.
 //!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
 //!
 template <typename TBatchStream>
 class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
 public:
  Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
                         const char* networkName, const char* inputBlobName,
                         bool readCache = true)
      : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
  int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
  bool getBatch(void* bindings[], const char* names[],
                int nbBindings) noexcept override {
    return mImpl.getBatch(bindings, names, nbBindings);
  }
  const void* readCalibrationCache(size_t& length) noexcept override {
    return mImpl.readCalibrationCache(length);
  }
  void writeCalibrationCache(const void* cache,
                             size_t length) noexcept override {
    mImpl.writeCalibrationCache(cache, length);
  }
 private:
  EntropyCalibratorImpl<TBatchStream> mImpl;
 };
 #endif // ENTROPY_CALIBRATOR_H
--- a/csrc/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
@@ -1,115 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef ERROR_RECORDER_H
 #define ERROR_RECORDER_H
 #include "NvInferRuntimeCommon.h"
 #include "logger.h"
 #include <atomic>
 #include <cstdint>
 #include <exception>
 #include <mutex>
 #include <vector>
 using nvinfer1::ErrorCode;
 using nvinfer1::IErrorRecorder;
 //!
 //! A simple implementation of the IErrorRecorder interface for
 //! use by samples. This interface also can be used as a reference
 //! implementation.
 //! The sample Error recorder is based on a vector that pairs the error
 //! code and the error string into a single element. It also uses
 //! standard mutex's and atomics in order to make sure that the code
 //! works in a multi-threaded environment.
 //!
 class SampleErrorRecorder : public IErrorRecorder {
  using errorPair = std::pair<ErrorCode, std::string>;
  using errorStack = std::vector<errorPair>;
 public:
  SampleErrorRecorder() = default;
  virtual ~SampleErrorRecorder() noexcept {}
  int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
  ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
    return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
                                       : (*this)[errorIdx].first;
  };
  IErrorRecorder::ErrorDesc
  getErrorDesc(int32_t errorIdx) const noexcept final {
    return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
                                       : (*this)[errorIdx].second.c_str();
  }
  // This class can never overflow since we have dynamic resize via std::vector
  // usage.
  bool hasOverflowed() const noexcept final { return false; }
  // Empty the errorStack.
  void clear() noexcept final {
    try {
      // grab a lock so that there is no addition while clearing.
      std::lock_guard<std::mutex> guard(mStackLock);
      mErrorStack.clear();
    } catch (const std::exception& e) {
      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
    }
  };
  //! Simple helper function that
  bool empty() const noexcept { return mErrorStack.empty(); }
  bool reportError(ErrorCode val,
                   IErrorRecorder::ErrorDesc desc) noexcept final {
    try {
      std::lock_guard<std::mutex> guard(mStackLock);
      sample::gLogError << "Error[" << static_cast<int32_t>(val)
                        << "]: " << desc << std::endl;
      mErrorStack.push_back(errorPair(val, desc));
    } catch (const std::exception& e) {
      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
    }
    // All errors are considered fatal.
    return true;
  }
  // Atomically increment or decrement the ref counter.
  IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
  IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
 private:
  // Simple helper functions.
  const errorPair& operator[](size_t index) const noexcept {
    return mErrorStack[index];
  }
  bool invalidIndexCheck(int32_t index) const noexcept {
    // By converting signed to unsigned, we only need a single check since
    // negative numbers turn into large positive greater than the size.
    size_t sIndex = index;
    return sIndex >= mErrorStack.size();
  }
  // Mutex to hold when locking mErrorStack.
  std::mutex mStackLock;
  // Reference count of the class. Destruction of the class when mRefCount
  // is not zero causes undefined behavior.
  std::atomic<int32_t> mRefCount{0};
  // The error stack that holds the errors recorded by TensorRT.
  errorStack mErrorStack;
 };     // class SampleErrorRecorder
 #endif // ERROR_RECORDER_H
--- a/csrc/fastdeploy/backends/tensorrt/common/README.md
+++ b/csrc/fastdeploy/backends/tensorrt/common/README.md
@@ -1 +0,0 @@
 目录代码来源自 https://github.com/NVIDIA/TensorRT
--- a/csrc/fastdeploy/backends/tensorrt/common/argsParser.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/argsParser.h
@@ -1,169 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TENSORRT_ARGS_PARSER_H
 #define TENSORRT_ARGS_PARSER_H
 #include <string>
 #include <vector>
 #ifdef _MSC_VER
 #include ".\windows\getopt.h"
 #else
 #include <getopt.h>
 #endif
 #include <iostream>
 namespace samplesCommon {
 //!
 //! \brief The SampleParams structure groups the basic parameters required by
 //!        all sample networks.
 //!
 struct SampleParams {
  int32_t batchSize{1}; //!< Number of inputs in a batch
  int32_t dlaCore{-1};  //!< Specify the DLA core to run network on.
  bool int8{false};     //!< Allow runnning the network in Int8 mode.
  bool fp16{false};     //!< Allow running the network in FP16 mode.
  std::vector<std::string>
      dataDirs; //!< Directory paths where sample data files are stored
  std::vector<std::string> inputTensorNames;
  std::vector<std::string> outputTensorNames;
 };
 //!
 //! \brief The CaffeSampleParams structure groups the additional parameters
 //! required by
 //!         networks that use caffe
 //!
 struct CaffeSampleParams : public SampleParams {
  std::string
      prototxtFileName; //!< Filename of prototxt design file of a network
  std::string
      weightsFileName;      //!< Filename of trained weights file of a network
  std::string meanFileName; //!< Filename of mean file of a network
 };
 //!
 //! \brief The OnnxSampleParams structure groups the additional parameters
 //! required by
 //!         networks that use ONNX
 //!
 struct OnnxSampleParams : public SampleParams {
  std::string onnxFileName; //!< Filename of ONNX file of a network
 };
 //!
 //! \brief The UffSampleParams structure groups the additional parameters
 //! required by
 //!         networks that use Uff
 //!
 struct UffSampleParams : public SampleParams {
  std::string uffFileName; //!< Filename of uff file of a network
 };
 //!
 //! /brief Struct to maintain command-line arguments.
 //!
 struct Args {
  bool runInInt8{false};
  bool runInFp16{false};
  bool help{false};
  int32_t useDLACore{-1};
  int32_t batch{1};
  std::vector<std::string> dataDirs;
  std::string saveEngine;
  std::string loadEngine;
  bool useILoop{false};
 };
 //!
 //! \brief Populates the Args struct with the provided command-line parameters.
 //!
 //! \throw invalid_argument if any of the arguments are not valid
 //!
 //! \return boolean If return value is true, execution can continue, otherwise
 //! program should exit
 //!
 inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
  while (1) {
    int32_t arg;
    static struct option long_options[] = {
        {"help", no_argument, 0, 'h'},
        {"datadir", required_argument, 0, 'd'},
        {"int8", no_argument, 0, 'i'},
        {"fp16", no_argument, 0, 'f'},
        {"useILoop", no_argument, 0, 'l'},
        {"saveEngine", required_argument, 0, 's'},
        {"loadEngine", no_argument, 0, 'o'},
        {"useDLACore", required_argument, 0, 'u'},
        {"batch", required_argument, 0, 'b'},
        {nullptr, 0, nullptr, 0}};
    int32_t option_index = 0;
    arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
    if (arg == -1) {
      break;
    }
    switch (arg) {
    case 'h':
      args.help = true;
      return true;
    case 'd':
      if (optarg) {
        args.dataDirs.push_back(optarg);
      } else {
        std::cerr << "ERROR: --datadir requires option argument" << std::endl;
        return false;
      }
      break;
    case 's':
      if (optarg) {
        args.saveEngine = optarg;
      }
      break;
    case 'o':
      if (optarg) {
        args.loadEngine = optarg;
      }
      break;
    case 'i':
      args.runInInt8 = true;
      break;
    case 'f':
      args.runInFp16 = true;
      break;
    case 'l':
      args.useILoop = true;
      break;
    case 'u':
      if (optarg) {
        args.useDLACore = std::stoi(optarg);
      }
      break;
    case 'b':
      if (optarg) {
        args.batch = std::stoi(optarg);
      }
      break;
    default:
      return false;
    }
  }
  return true;
 }
 } // namespace samplesCommon
 #endif // TENSORRT_ARGS_PARSER_H
--- a/csrc/fastdeploy/backends/tensorrt/common/buffers.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/buffers.h
@@ -1,426 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TENSORRT_BUFFERS_H
 #define TENSORRT_BUFFERS_H
 #include "NvInfer.h"
 #include "common.h"
 #include "half.h"
 #include <cassert>
 #include <cuda_runtime_api.h>
 #include <iostream>
 #include <iterator>
 #include <memory>
 #include <new>
 #include <numeric>
 #include <string>
 #include <vector>
 namespace samplesCommon {
 //!
 //! \brief  The GenericBuffer class is a templated class for buffers.
 //!
 //! \details This templated RAII (Resource Acquisition Is Initialization) class
 //! handles the allocation,
 //!          deallocation, querying of buffers on both the device and the host.
 //!          It can handle data of arbitrary types because it stores byte
 //!          buffers.
 //!          The template parameters AllocFunc and FreeFunc are used for the
 //!          allocation and deallocation of the buffer.
 //!          AllocFunc must be a functor that takes in (void** ptr, size_t size)
 //!          and returns bool. ptr is a pointer to where the allocated buffer
 //!          address should be stored.
 //!          size is the amount of memory in bytes to allocate.
 //!          The boolean indicates whether or not the memory allocation was
 //!          successful.
 //!          FreeFunc must be a functor that takes in (void* ptr) and returns
 //!          void.
 //!          ptr is the allocated buffer address. It must work with nullptr
 //!          input.
 //!
 template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
 public:
  //!
  //! \brief Construct an empty buffer.
  //!
  GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
      : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
  //!
  //! \brief Construct a buffer with the specified allocation size in bytes.
  //!
  GenericBuffer(size_t size, nvinfer1::DataType type)
      : mSize(size), mCapacity(size), mType(type) {
    if (!allocFn(&mBuffer, this->nbBytes())) {
      throw std::bad_alloc();
    }
  }
  GenericBuffer(GenericBuffer&& buf)
      : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
        mBuffer(buf.mBuffer) {
    buf.mSize = 0;
    buf.mCapacity = 0;
    buf.mType = nvinfer1::DataType::kFLOAT;
    buf.mBuffer = nullptr;
  }
  GenericBuffer& operator=(GenericBuffer&& buf) {
    if (this != &buf) {
      freeFn(mBuffer);
      mSize = buf.mSize;
      mCapacity = buf.mCapacity;
      mType = buf.mType;
      mBuffer = buf.mBuffer;
      // Reset buf.
      buf.mSize = 0;
      buf.mCapacity = 0;
      buf.mBuffer = nullptr;
    }
    return *this;
  }
  //!
  //! \brief Returns pointer to underlying array.
  //!
  void* data() { return mBuffer; }
  //!
  //! \brief Returns pointer to underlying array.
  //!
  const void* data() const { return mBuffer; }
  //!
  //! \brief Returns the size (in number of elements) of the buffer.
  //!
  size_t size() const { return mSize; }
  //!
  //! \brief Returns the size (in bytes) of the buffer.
  //!
  size_t nbBytes() const {
    return this->size() * samplesCommon::getElementSize(mType);
  }
  //!
  //! \brief Resizes the buffer. This is a no-op if the new size is smaller than
  //! or equal to the current capacity.
  //!
  void resize(size_t newSize) {
    mSize = newSize;
    if (mCapacity < newSize) {
      freeFn(mBuffer);
      if (!allocFn(&mBuffer, this->nbBytes())) {
        throw std::bad_alloc{};
      }
      mCapacity = newSize;
    }
  }
  //!
  //! \brief Overload of resize that accepts Dims
  //!
  void resize(const nvinfer1::Dims& dims) {
    return this->resize(samplesCommon::volume(dims));
  }
  ~GenericBuffer() { freeFn(mBuffer); }
 private:
  size_t mSize{0}, mCapacity{0};
  nvinfer1::DataType mType;
  void* mBuffer;
  AllocFunc allocFn;
  FreeFunc freeFn;
 };
 class DeviceAllocator {
 public:
  bool operator()(void** ptr, size_t size) const {
    return cudaMalloc(ptr, size) == cudaSuccess;
  }
 };
 class DeviceFree {
 public:
  void operator()(void* ptr) const { cudaFree(ptr); }
 };
 class HostAllocator {
 public:
  bool operator()(void** ptr, size_t size) const {
    *ptr = malloc(size);
    return *ptr != nullptr;
  }
 };
 class HostFree {
 public:
  void operator()(void* ptr) const { free(ptr); }
 };
 using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
 using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
 //!
 //! \brief  The ManagedBuffer class groups together a pair of corresponding
 //! device and host buffers.
 //!
 class ManagedBuffer {
 public:
  DeviceBuffer deviceBuffer;
  HostBuffer hostBuffer;
 };
 //!
 //! \brief  The BufferManager class handles host and device buffer allocation
 //! and deallocation.
 //!
 //! \details This RAII class handles host and device buffer allocation and
 //! deallocation,
 //!          memcpy between host and device buffers to aid with inference,
 //!          and debugging dumps to validate inference. The BufferManager class
 //!          is meant to be
 //!          used to simplify buffer management and any interactions between
 //!          buffers and the engine.
 //!
 class BufferManager {
 public:
  static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
  //!
  //! \brief Create a BufferManager for handling buffer interactions with
  //! engine.
  //!
  BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
                const int batchSize = 0,
                const nvinfer1::IExecutionContext* context = nullptr)
      : mEngine(engine), mBatchSize(batchSize) {
    // Full Dims implies no batch size.
    assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
    // Create host and device buffers
    for (int i = 0; i < mEngine->getNbBindings(); i++) {
      auto dims = context ? context->getBindingDimensions(i)
                          : mEngine->getBindingDimensions(i);
      size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
      nvinfer1::DataType type = mEngine->getBindingDataType(i);
      int vecDim = mEngine->getBindingVectorizedDim(i);
      if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
      {
        int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
        dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
        vol *= scalarsPerVec;
      }
      vol *= samplesCommon::volume(dims);
      std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
      manBuf->deviceBuffer = DeviceBuffer(vol, type);
      manBuf->hostBuffer = HostBuffer(vol, type);
      mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
      mManagedBuffers.emplace_back(std::move(manBuf));
    }
  }
  //!
  //! \brief Returns a vector of device buffers that you can use directly as
  //!        bindings for the execute and enqueue methods of IExecutionContext.
  //!
  std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
  //!
  //! \brief Returns a vector of device buffers.
  //!
  const std::vector<void*>& getDeviceBindings() const {
    return mDeviceBindings;
  }
  //!
  //! \brief Returns the device buffer corresponding to tensorName.
  //!        Returns nullptr if no such tensor can be found.
  //!
  void* getDeviceBuffer(const std::string& tensorName) const {
    return getBuffer(false, tensorName);
  }
  //!
  //! \brief Returns the host buffer corresponding to tensorName.
  //!        Returns nullptr if no such tensor can be found.
  //!
  void* getHostBuffer(const std::string& tensorName) const {
    return getBuffer(true, tensorName);
  }
  //!
  //! \brief Returns the size of the host and device buffers that correspond to
  //! tensorName.
  //!        Returns kINVALID_SIZE_VALUE if no such tensor can be found.
  //!
  size_t size(const std::string& tensorName) const {
    int index = mEngine->getBindingIndex(tensorName.c_str());
    if (index == -1)
      return kINVALID_SIZE_VALUE;
    return mManagedBuffers[index]->hostBuffer.nbBytes();
  }
  //!
  //! \brief Dump host buffer with specified tensorName to ostream.
  //!        Prints error message to std::ostream if no such tensor can be
  //!        found.
  //!
  void dumpBuffer(std::ostream& os, const std::string& tensorName) {
    int index = mEngine->getBindingIndex(tensorName.c_str());
    if (index == -1) {
      os << "Invalid tensor name" << std::endl;
      return;
    }
    void* buf = mManagedBuffers[index]->hostBuffer.data();
    size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
    nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
    size_t rowCount = static_cast<size_t>(
        bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
    int leadDim = mBatchSize;
    int* trailDims = bufDims.d;
    int nbDims = bufDims.nbDims;
    // Fix explicit Dimension networks
    if (!leadDim && nbDims > 0) {
      leadDim = bufDims.d[0];
      ++trailDims;
      --nbDims;
    }
    os << "[" << leadDim;
    for (int i = 0; i < nbDims; i++)
      os << ", " << trailDims[i];
    os << "]" << std::endl;
    switch (mEngine->getBindingDataType(index)) {
    case nvinfer1::DataType::kINT32:
      print<int32_t>(os, buf, bufSize, rowCount);
      break;
    case nvinfer1::DataType::kFLOAT:
      print<float>(os, buf, bufSize, rowCount);
      break;
    case nvinfer1::DataType::kHALF:
      print<half_float::half>(os, buf, bufSize, rowCount);
      break;
    case nvinfer1::DataType::kINT8:
      assert(0 && "Int8 network-level input and output is not supported");
      break;
    case nvinfer1::DataType::kBOOL:
      assert(0 && "Bool network-level input and output are not supported");
      break;
    }
  }
  //!
  //! \brief Templated print function that dumps buffers of arbitrary type to
  //! std::ostream.
  //!        rowCount parameter controls how many elements are on each line.
  //!        A rowCount of 1 means that there is only 1 element on each line.
  //!
  template <typename T>
  void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
    assert(rowCount != 0);
    assert(bufSize % sizeof(T) == 0);
    T* typedBuf = static_cast<T*>(buf);
    size_t numItems = bufSize / sizeof(T);
    for (int i = 0; i < static_cast<int>(numItems); i++) {
      // Handle rowCount == 1 case
      if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
        os << typedBuf[i] << std::endl;
      else if (rowCount == 1)
        os << typedBuf[i];
      // Handle rowCount > 1 case
      else if (i % rowCount == 0)
        os << typedBuf[i];
      else if (i % rowCount == rowCount - 1)
        os << " " << typedBuf[i] << std::endl;
      else
        os << " " << typedBuf[i];
    }
  }
  //!
  //! \brief Copy the contents of input host buffers to input device buffers
  //! synchronously.
  //!
  void copyInputToDevice() { memcpyBuffers(true, false, false); }
  //!
  //! \brief Copy the contents of output device buffers to output host buffers
  //! synchronously.
  //!
  void copyOutputToHost() { memcpyBuffers(false, true, false); }
  //!
  //! \brief Copy the contents of input host buffers to input device buffers
  //! asynchronously.
  //!
  void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
    memcpyBuffers(true, false, true, stream);
  }
  //!
  //! \brief Copy the contents of output device buffers to output host buffers
  //! asynchronously.
  //!
  void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
    memcpyBuffers(false, true, true, stream);
  }
  ~BufferManager() = default;
 private:
  void* getBuffer(const bool isHost, const std::string& tensorName) const {
    int index = mEngine->getBindingIndex(tensorName.c_str());
    if (index == -1)
      return nullptr;
    return (isHost ? mManagedBuffers[index]->hostBuffer.data()
                   : mManagedBuffers[index]->deviceBuffer.data());
  }
  void memcpyBuffers(const bool copyInput, const bool deviceToHost,
                     const bool async, const cudaStream_t& stream = 0) {
    for (int i = 0; i < mEngine->getNbBindings(); i++) {
      void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
                                  : mManagedBuffers[i]->deviceBuffer.data();
      const void* srcPtr = deviceToHost
                               ? mManagedBuffers[i]->deviceBuffer.data()
                               : mManagedBuffers[i]->hostBuffer.data();
      const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
      const cudaMemcpyKind memcpyType =
          deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
      if ((copyInput && mEngine->bindingIsInput(i)) ||
          (!copyInput && !mEngine->bindingIsInput(i))) {
        if (async)
          CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
        else
          CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
      }
    }
  }
  std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
  int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
  std::vector<std::unique_ptr<ManagedBuffer>>
      mManagedBuffers; //!< The vector of pointers to managed buffers
  std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
                                      //! for engine execution
 };
 } // namespace samplesCommon
 #endif // TENSORRT_BUFFERS_H
--- a/csrc/fastdeploy/backends/tensorrt/common/common.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/common.h
@@ -1,844 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TENSORRT_COMMON_H
 #define TENSORRT_COMMON_H
 // For loadLibrary
 #ifdef _MSC_VER
 // Needed so that the max/min definitions in windows.h do not conflict with
 // std::max/min.
 #define NOMINMAX
 #include <windows.h>
 #undef NOMINMAX
 #else
 #include <dlfcn.h>
 #endif
 #include "NvInfer.h"
 #include "NvInferPlugin.h"
 #include "logger.h"
 #include <algorithm>
 #include <cassert>
 #include <chrono>
 #include <cmath>
 #include <cstring>
 #include <cuda_runtime_api.h>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <iterator>
 #include <map>
 #include <memory>
 #include <new>
 #include <numeric>
 #include <ratio>
 #include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
 #include "safeCommon.h"
 using namespace nvinfer1;
 using namespace plugin;
 #ifdef _MSC_VER
 #define FN_NAME __FUNCTION__
 #else
 #define FN_NAME __func__
 #endif
 #if defined(__aarch64__) || defined(__QNX__)
 #define ENABLE_DLA_API 1
 #endif
 #define CHECK_RETURN_W_MSG(status, val, errMsg)                                \
  do {                                                                         \
    if (!(status)) {                                                           \
      sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
                        << FN_NAME << "(), line " << __LINE__ << std::endl;    \
      return val;                                                              \
    }                                                                          \
  } while (0)
 #undef ASSERT
 #define ASSERT(condition)                                                      \
  do {                                                                         \
    if (!(condition)) {                                                        \
      sample::gLogError << "Assertion failure: " << #condition << std::endl;   \
      abort();                                                                 \
    }                                                                          \
  } while (0)
 #define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
 #define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
 template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
  CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
  auto deleter = [](T* t) { t->destroy(); };
  return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
 }
 constexpr long double operator"" _GiB(long double val) {
  return val * (1 << 30);
 }
 constexpr long double operator"" _MiB(long double val) {
  return val * (1 << 20);
 }
 constexpr long double operator"" _KiB(long double val) {
  return val * (1 << 10);
 }
 // These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
 // Since the return type is signed, -1_GiB will work as expected.
 constexpr long long int operator"" _GiB(unsigned long long val) {
  return val * (1 << 30);
 }
 constexpr long long int operator"" _MiB(unsigned long long val) {
  return val * (1 << 20);
 }
 constexpr long long int operator"" _KiB(unsigned long long val) {
  return val * (1 << 10);
 }
 struct SimpleProfiler : public nvinfer1::IProfiler {
  struct Record {
    float time{0};
    int count{0};
  };
  virtual void reportLayerTime(const char* layerName, float ms) noexcept {
    mProfile[layerName].count++;
    mProfile[layerName].time += ms;
    if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
        mLayerNames.end()) {
      mLayerNames.push_back(layerName);
    }
  }
  SimpleProfiler(const char* name,
                 const std::vector<SimpleProfiler>& srcProfilers =
                     std::vector<SimpleProfiler>())
      : mName(name) {
    for (const auto& srcProfiler : srcProfilers) {
      for (const auto& rec : srcProfiler.mProfile) {
        auto it = mProfile.find(rec.first);
        if (it == mProfile.end()) {
          mProfile.insert(rec);
        } else {
          it->second.time += rec.second.time;
          it->second.count += rec.second.count;
        }
      }
    }
  }
  friend std::ostream& operator<<(std::ostream& out,
                                  const SimpleProfiler& value) {
    out << "========== " << value.mName << " profile ==========" << std::endl;
    float totalTime = 0;
    std::string layerNameStr = "TensorRT layer name";
    int maxLayerNameLength =
        std::max(static_cast<int>(layerNameStr.size()), 70);
    for (const auto& elem : value.mProfile) {
      totalTime += elem.second.time;
      maxLayerNameLength =
          std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
    }
    auto old_settings = out.flags();
    auto old_precision = out.precision();
    // Output header
    {
      out << std::setw(maxLayerNameLength) << layerNameStr << " ";
      out << std::setw(12) << "Runtime, "
          << "%"
          << " ";
      out << std::setw(12) << "Invocations"
          << " ";
      out << std::setw(12) << "Runtime, ms" << std::endl;
    }
    for (size_t i = 0; i < value.mLayerNames.size(); i++) {
      const std::string layerName = value.mLayerNames[i];
      auto elem = value.mProfile.at(layerName);
      out << std::setw(maxLayerNameLength) << layerName << " ";
      out << std::setw(12) << std::fixed << std::setprecision(1)
          << (elem.time * 100.0F / totalTime) << "%"
          << " ";
      out << std::setw(12) << elem.count << " ";
      out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
          << std::endl;
    }
    out.flags(old_settings);
    out.precision(old_precision);
    out << "========== " << value.mName << " total runtime = " << totalTime
        << " ms ==========" << std::endl;
    return out;
  }
 private:
  std::string mName;
  std::vector<std::string> mLayerNames;
  std::map<std::string, Record> mProfile;
 };
 //! Locate path to file, given its filename or filepath suffix and possible dirs
 //! it might lie in.
 //! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
 //! file path.
 inline std::string locateFile(const std::string& filepathSuffix,
                              const std::vector<std::string>& directories,
                              bool reportError = true) {
  const int MAX_DEPTH{10};
  bool found{false};
  std::string filepath;
  for (auto& dir : directories) {
    if (!dir.empty() && dir.back() != '/') {
 #ifdef _MSC_VER
      filepath = dir + "\\" + filepathSuffix;
 #else
      filepath = dir + "/" + filepathSuffix;
 #endif
    } else {
      filepath = dir + filepathSuffix;
    }
    for (int i = 0; i < MAX_DEPTH && !found; i++) {
      const std::ifstream checkFile(filepath);
      found = checkFile.is_open();
      if (found) {
        break;
      }
      filepath = "../" + filepath; // Try again in parent dir
    }
    if (found) {
      break;
    }
    filepath.clear();
  }
  // Could not find the file
  if (filepath.empty()) {
    const std::string dirList = std::accumulate(
        directories.begin() + 1, directories.end(), directories.front(),
        [](const std::string& a, const std::string& b) {
          return a + "\n\t" + b;
        });
    std::cout << "Could not find " << filepathSuffix
              << " in data directories:\n\t" << dirList << std::endl;
    if (reportError) {
      std::cout << "&&&& FAILED" << std::endl;
      exit(EXIT_FAILURE);
    }
  }
  return filepath;
 }
 inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
                        int inW) {
  std::ifstream infile(fileName, std::ifstream::binary);
  assert(infile.is_open() &&
         "Attempting to read from a file that is not open.");
  std::string magic, h, w, max;
  infile >> magic >> h >> w >> max;
  infile.seekg(1, infile.cur);
  infile.read(reinterpret_cast<char*>(buffer), inH * inW);
 }
 namespace samplesCommon {
 // Swaps endianness of an integral type.
 template <typename T,
          typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
 inline T swapEndianness(const T& value) {
  uint8_t bytes[sizeof(T)];
  for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
    bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
  }
  return *reinterpret_cast<T*>(bytes);
 }
 class HostMemory {
 public:
  HostMemory() = delete;
  virtual void* data() const noexcept { return mData; }
  virtual std::size_t size() const noexcept { return mSize; }
  virtual DataType type() const noexcept { return mType; }
  virtual ~HostMemory() {}
 protected:
  HostMemory(std::size_t size, DataType type)
      : mData{nullptr}, mSize(size), mType(type) {}
  void* mData;
  std::size_t mSize;
  DataType mType;
 };
 template <typename ElemType, DataType dataType>
 class TypedHostMemory : public HostMemory {
 public:
  explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
    mData = new ElemType[size];
  };
  ~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
  ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
 };
 using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
 using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
 using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
 inline void* safeCudaMalloc(size_t memSize) {
  void* deviceMem;
  CHECK(cudaMalloc(&deviceMem, memSize));
  if (deviceMem == nullptr) {
    std::cerr << "Out of memory" << std::endl;
    exit(1);
  }
  return deviceMem;
 }
 inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
 struct InferDeleter {
  template <typename T> void operator()(T* obj) const { delete obj; }
 };
 template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
 static auto StreamDeleter = [](cudaStream_t* pStream) {
  if (pStream) {
    cudaStreamDestroy(*pStream);
    delete pStream;
  }
 };
 inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
  std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
      new cudaStream_t, StreamDeleter);
  if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
      cudaSuccess) {
    pStream.reset(nullptr);
  }
  return pStream;
 }
 //! Return vector of indices that puts magnitudes of sequence in descending
 //! order.
 template <class Iter>
 std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
  std::vector<size_t> indices(end - begin);
  std::iota(indices.begin(), indices.end(), 0);
  std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
    return std::abs(begin[j]) < std::abs(begin[i]);
  });
  return indices;
 }
 inline bool readReferenceFile(const std::string& fileName,
                              std::vector<std::string>& refVector) {
  std::ifstream infile(fileName);
  if (!infile.is_open()) {
    std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
                 "that is not open."
              << std::endl;
    return false;
  }
  std::string line;
  while (std::getline(infile, line)) {
    if (line.empty())
      continue;
    refVector.push_back(line);
  }
  infile.close();
  return true;
 }
 template <typename T>
 std::vector<std::string> classify(const std::vector<std::string>& refVector,
                                  const std::vector<T>& output,
                                  const size_t topK) {
  const auto inds =
      samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
  std::vector<std::string> result;
  result.reserve(topK);
  for (size_t k = 0; k < topK; ++k) {
    result.push_back(refVector[inds[k]]);
  }
  return result;
 }
 // Returns indices of highest K magnitudes in v.
 template <typename T>
 std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
  std::vector<size_t> indices =
      samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
  indices.resize(k);
  return indices;
 }
 template <typename T>
 bool readASCIIFile(const std::string& fileName, const size_t size,
                   std::vector<T>& out) {
  std::ifstream infile(fileName);
  if (!infile.is_open()) {
    std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
                 "not open."
              << std::endl;
    return false;
  }
  out.clear();
  out.reserve(size);
  out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
  infile.close();
  return true;
 }
 template <typename T>
 bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
  std::ofstream outfile(fileName);
  if (!outfile.is_open()) {
    std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
                 "not open."
              << std::endl;
    return false;
  }
  for (auto fn : in) {
    outfile << fn << "\n";
  }
  outfile.close();
  return true;
 }
 inline void print_version() {
  std::cout << "  TensorRT version: " << NV_TENSORRT_MAJOR << "."
            << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
            << NV_TENSORRT_BUILD << std::endl;
 }
 inline std::string getFileType(const std::string& filepath) {
  return filepath.substr(filepath.find_last_of(".") + 1);
 }
 inline std::string toLower(const std::string& inp) {
  std::string out = inp;
  std::transform(out.begin(), out.end(), out.begin(), ::tolower);
  return out;
 }
 inline float getMaxValue(const float* buffer, int64_t size) {
  assert(buffer != nullptr);
  assert(size > 0);
  return *std::max_element(buffer, buffer + size);
 }
 // Ensures that every tensor used by a network has a dynamic range set.
 //
 // All tensors in a network must have a dynamic range specified if a calibrator
 // is not used.
 // This function is just a utility to globally fill in missing scales and
 // zero-points for the entire network.
 //
 // If a tensor does not have a dyanamic range set, it is assigned inRange or
 // outRange as follows:
 //
 // * If the tensor is the input to a layer or output of a pooling node, its
 // dynamic range is derived from inRange.
 // * Otherwise its dynamic range is derived from outRange.
 //
 // The default parameter values are intended to demonstrate, for final layers in
 // the network,
 // cases where dynamic ranges are asymmetric.
 //
 // The default parameter values choosen arbitrarily. Range values should be
 // choosen such that
 // we avoid underflow or overflow. Also range value should be non zero to avoid
 // uniform zero scale tensor.
 inline void setAllDynamicRanges(INetworkDefinition* network,
                                float inRange = 2.0f, float outRange = 4.0f) {
  // Ensure that all layer inputs have a scale.
  for (int i = 0; i < network->getNbLayers(); i++) {
    auto layer = network->getLayer(i);
    for (int j = 0; j < layer->getNbInputs(); j++) {
      ITensor* input{layer->getInput(j)};
      // Optional inputs are nullptr here and are from RNN layers.
      if (input != nullptr && !input->dynamicRangeIsSet()) {
        ASSERT(input->setDynamicRange(-inRange, inRange));
      }
    }
  }
  // Ensure that all layer outputs have a scale.
  // Tensors that are also inputs to layers are ingored here
  // since the previous loop nest assigned scales to them.
  for (int i = 0; i < network->getNbLayers(); i++) {
    auto layer = network->getLayer(i);
    for (int j = 0; j < layer->getNbOutputs(); j++) {
      ITensor* output{layer->getOutput(j)};
      // Optional outputs are nullptr here and are from RNN layers.
      if (output != nullptr && !output->dynamicRangeIsSet()) {
        // Pooling must have the same input and output scales.
        if (layer->getType() == LayerType::kPOOLING) {
          ASSERT(output->setDynamicRange(-inRange, inRange));
        } else {
          ASSERT(output->setDynamicRange(-outRange, outRange));
        }
      }
    }
  }
 }
 inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
                                      INetworkDefinition* n) {
  // Set dummy per-tensor dynamic range if Int8 mode is requested.
  if (c->getFlag(BuilderFlag::kINT8)) {
    sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
                           "per-tensor dynamic range. Int8 accuracy is not "
                           "guaranteed."
                        << std::endl;
    setAllDynamicRanges(n);
  }
 }
 inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
                      bool allowGPUFallback = true) {
  if (useDLACore >= 0) {
    if (builder->getNbDLACores() == 0) {
      std::cerr << "Trying to use DLA core " << useDLACore
                << " on a platform that doesn't have any DLA cores"
                << std::endl;
      assert(
          "Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
          false);
    }
    if (allowGPUFallback) {
      config->setFlag(BuilderFlag::kGPU_FALLBACK);
    }
    if (!config->getFlag(BuilderFlag::kINT8)) {
      // User has not requested INT8 Mode.
      // By default run in FP16 mode. FP32 mode is not permitted.
      config->setFlag(BuilderFlag::kFP16);
    }
    config->setDefaultDeviceType(DeviceType::kDLA);
    config->setDLACore(useDLACore);
  }
 }
 inline int32_t parseDLA(int32_t argc, char** argv) {
  for (int32_t i = 1; i < argc; i++) {
    if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
      return std::stoi(argv[i] + 13);
    }
  }
  return -1;
 }
 inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
  switch (t) {
  case nvinfer1::DataType::kINT32:
    return 4;
  case nvinfer1::DataType::kFLOAT:
    return 4;
  case nvinfer1::DataType::kHALF:
    return 2;
  case nvinfer1::DataType::kBOOL:
  case nvinfer1::DataType::kINT8:
    return 1;
  }
  return 0;
 }
 inline int64_t volume(const nvinfer1::Dims& d) {
  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
 }
 template <int C, int H, int W> struct PPM {
  std::string magic, fileName;
  int h, w, max;
  uint8_t buffer[C * H * W];
 };
 // New vPPM(variable sized PPM) class with variable dimensions.
 struct vPPM {
  std::string magic, fileName;
  int h, w, max;
  std::vector<uint8_t> buffer;
 };
 struct BBox {
  float x1, y1, x2, y2;
 };
 template <int C, int H, int W>
 void readPPMFile(const std::string& filename,
                 samplesCommon::PPM<C, H, W>& ppm) {
  ppm.fileName = filename;
  std::ifstream infile(filename, std::ifstream::binary);
  assert(infile.is_open() &&
         "Attempting to read from a file that is not open.");
  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
  infile.seekg(1, infile.cur);
  infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
 }
 inline void readPPMFile(const std::string& filename, vPPM& ppm,
                        std::vector<std::string>& input_dir) {
  ppm.fileName = filename;
  std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
  infile.seekg(1, infile.cur);
  for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
    ppm.buffer.push_back(0);
  }
  infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
 }
 template <int C, int H, int W>
 void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
                          const BBox& bbox) {
  std::ofstream outfile("./" + filename, std::ofstream::binary);
  assert(!outfile.fail());
  outfile << "P6"
          << "\n"
          << ppm.w << " " << ppm.h << "\n"
          << ppm.max << "\n";
  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
  const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
  const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
  const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
  const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
  for (int x = x1; x <= x2; ++x) {
    // bbox top border
    ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
    ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
    ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
    // bbox bottom border
    ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
    ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
    ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
  }
  for (int y = y1; y <= y2; ++y) {
    // bbox left border
    ppm.buffer[(y * ppm.w + x1) * 3] = 255;
    ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
    ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
    // bbox right border
    ppm.buffer[(y * ppm.w + x2) * 3] = 255;
    ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
    ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
  }
  outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
 }
 inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
                                 std::vector<BBox>& dets) {
  std::ofstream outfile("./" + filename, std::ofstream::binary);
  assert(!outfile.fail());
  outfile << "P6"
          << "\n"
          << ppm.w << " " << ppm.h << "\n"
          << ppm.max << "\n";
  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
  for (auto bbox : dets) {
    for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
      // bbox top border
      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
      // bbox bottom border
      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
    }
    for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
      // bbox left border
      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
      // bbox right border
      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
    }
  }
  outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
 }
 class TimerBase {
 public:
  virtual void start() {}
  virtual void stop() {}
  float microseconds() const noexcept { return mMs * 1000.f; }
  float milliseconds() const noexcept { return mMs; }
  float seconds() const noexcept { return mMs / 1000.f; }
  void reset() noexcept { mMs = 0.f; }
 protected:
  float mMs{0.0f};
 };
 class GpuTimer : public TimerBase {
 public:
  explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
    CHECK(cudaEventCreate(&mStart));
    CHECK(cudaEventCreate(&mStop));
  }
  ~GpuTimer() {
    CHECK(cudaEventDestroy(mStart));
    CHECK(cudaEventDestroy(mStop));
  }
  void start() { CHECK(cudaEventRecord(mStart, mStream)); }
  void stop() {
    CHECK(cudaEventRecord(mStop, mStream));
    float ms{0.0f};
    CHECK(cudaEventSynchronize(mStop));
    CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
    mMs += ms;
  }
 private:
  cudaEvent_t mStart, mStop;
  cudaStream_t mStream;
 }; // class GpuTimer
 template <typename Clock> class CpuTimer : public TimerBase {
 public:
  using clock_type = Clock;
  void start() { mStart = Clock::now(); }
  void stop() {
    mStop = Clock::now();
    mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
  }
 private:
  std::chrono::time_point<Clock> mStart, mStop;
 }; // class CpuTimer
 using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
 inline std::vector<std::string> splitString(std::string str,
                                            char delimiter = ',') {
  std::vector<std::string> splitVect;
  std::stringstream ss(str);
  std::string substr;
  while (ss.good()) {
    getline(ss, substr, delimiter);
    splitVect.emplace_back(std::move(substr));
  }
  return splitVect;
 }
 // Return m rounded up to nearest multiple of n
 inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
 inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
 inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
 inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
 inline void loadLibrary(const std::string& path) {
 #ifdef _MSC_VER
  void* handle = LoadLibrary(path.c_str());
 #else
  int32_t flags{RTLD_LAZY};
 #if ENABLE_ASAN
  // https://github.com/google/sanitizers/issues/89
  // asan doesn't handle module unloading correctly and there are no plans on
  // doing
  // so. In order to get proper stack traces, don't delete the shared library on
  // close so that asan can resolve the symbols correctly.
  flags |= RTLD_NODELETE;
 #endif // ENABLE_ASAN
  void* handle = dlopen(path.c_str(), flags);
 #endif
  if (handle == nullptr) {
 #ifdef _MSC_VER
    sample::gLogError << "Could not load plugin library: " << path << std::endl;
 #else
    sample::gLogError << "Could not load plugin library: " << path
                      << ", due to: " << dlerror() << std::endl;
 #endif
  }
 }
 inline int32_t getSMVersion() {
  int32_t deviceIndex = 0;
  CHECK(cudaGetDevice(&deviceIndex));
  int32_t major, minor;
  CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
                               deviceIndex));
  CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
                               deviceIndex));
  return ((major << 8) | minor);
 }
 inline bool isSMSafe() {
  const int32_t smVersion = getSMVersion();
  return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
         smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
 }
 inline bool isDataTypeSupported(DataType dataType) {
  auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
  if (!builder) {
    return false;
  }
  if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
      (dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
    return false;
  }
  return true;
 }
 } // namespace samplesCommon
 inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
  os << "(";
  for (int i = 0; i < dims.nbDims; ++i) {
    os << (i ? ", " : "") << dims.d[i];
  }
  return os << ")";
 }
 #endif // TENSORRT_COMMON_H
--- a/csrc/fastdeploy/backends/tensorrt/common/getOptions.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/getOptions.cpp
@@ -1,223 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "getOptions.h"
 #include "logger.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
 #include <cstring>
 #include <set>
 namespace nvinfer1 {
 namespace utility {
 //! Matching for TRTOptions is defined as follows:
 //!
 //! If A and B both have longName set, A matches B if and only if A.longName ==
 //! B.longName and (A.shortName == B.shortName if both have short name set).
 //!
 //! If A only has shortName set and B only has longName set, then A does not
 //! match B. It is assumed that when 2 TRTOptions are compared, one of them is
 //! the definition of a TRTOption in the input to getOptions. As such, if the
 //! definition only has shortName set, it will never be equal to a TRTOption
 //! that does not have shortName set (and same for longName).
 //!
 //! If A and B both have shortName set but B does not have longName set, A
 //! matches B if and only if A.shortName == B.shortName.
 //!
 //! If A has neither long or short name set, A matches B if and only if B has
 //! neither long or short name set.
 bool matches(const TRTOption& a, const TRTOption& b) {
  if (!a.longName.empty() && !b.longName.empty()) {
    if (a.shortName && b.shortName) {
      return (a.longName == b.longName) && (a.shortName == b.shortName);
    }
    return a.longName == b.longName;
  }
  // If only one of them is not set, this will return false anyway.
  return a.shortName == b.shortName;
 }
 //! getTRTOptionIndex returns the index of a TRTOption in a vector of
 //! TRTOptions, -1 if not found.
 int getTRTOptionIndex(const std::vector<TRTOption>& options,
                      const TRTOption& opt) {
  for (size_t i = 0; i < options.size(); ++i) {
    if (matches(opt, options[i])) {
      return i;
    }
  }
  return -1;
 }
 //! validateTRTOption will return a string containing an error message if
 //! options
 //! contain non-numeric characters, or if there are duplicate option names
 //! found.
 //! Otherwise, returns the empty string.
 std::string validateTRTOption(const std::set<char>& seenShortNames,
                              const std::set<std::string>& seenLongNames,
                              const TRTOption& opt) {
  if (opt.shortName != 0) {
    if (!std::isalnum(opt.shortName)) {
      return "Short name '" + std::to_string(opt.shortName) +
             "' is non-alphanumeric";
    }
    if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
      return "Short name '" + std::to_string(opt.shortName) +
             "' is a duplicate";
    }
  }
  if (!opt.longName.empty()) {
    for (const char& c : opt.longName) {
      if (!std::isalnum(c) && c != '-' && c != '_') {
        return "Long name '" + opt.longName +
               "' contains characters that are not '-', '_', or alphanumeric";
      }
    }
    if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
      return "Long name '" + opt.longName + "' is a duplicate";
    }
  }
  return "";
 }
 //! validateTRTOptions will return a string containing an error message if any
 //! options contain non-numeric characters, or if there are duplicate option
 //! names found. Otherwise, returns the empty string.
 std::string validateTRTOptions(const std::vector<TRTOption>& options) {
  std::set<char> seenShortNames;
  std::set<std::string> seenLongNames;
  for (size_t i = 0; i < options.size(); ++i) {
    const std::string errMsg =
        validateTRTOption(seenShortNames, seenLongNames, options[i]);
    if (!errMsg.empty()) {
      return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
    }
    seenShortNames.insert(options[i].shortName);
    seenLongNames.insert(options[i].longName);
  }
  return "";
 }
 //! parseArgs parses an argument list and returns a TRTParsedArgs with the
 //! fields set accordingly. Assumes that options is validated.
 //! ErrMsg will be set if:
 //!     - an argument is null
 //!     - an argument is empty
 //!     - an argument does not have option (i.e. "-" and "--")
 //!     - a short argument has more than 1 character
 //!     - the last argument in the list requires a value
 TRTParsedArgs parseArgs(int argc, const char* const* argv,
                        const std::vector<TRTOption>& options) {
  TRTParsedArgs parsedArgs;
  parsedArgs.values.resize(options.size());
  for (int i = 1; i < argc; ++i) // index of current command-line argument
  {
    if (argv[i] == nullptr) {
      return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
    }
    const std::string argStr(argv[i]);
    if (argStr.empty()) {
      return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
    }
    // No starting hyphen means it is a positional argument
    if (argStr[0] != '-') {
      parsedArgs.positionalArgs.push_back(argStr);
      continue;
    }
    if (argStr == "-" || argStr == "--") {
      return TRTParsedArgs{"Argument does not specify an option at index " +
                           std::to_string(i)};
    }
    // If only 1 hyphen, char after is the flag.
    TRTOption opt{' ', "", false, ""};
    std::string value;
    if (argStr[1] != '-') {
      // Must only have 1 char after the hyphen
      if (argStr.size() > 2) {
        return TRTParsedArgs{
            "Short arg contains more than 1 character at index " +
            std::to_string(i)};
      }
      opt.shortName = argStr[1];
    } else {
      opt.longName = argStr.substr(2);
      // We need to support --foo=bar syntax, so look for '='
      const size_t eqIndex = opt.longName.find('=');
      if (eqIndex < opt.longName.size()) {
        value = opt.longName.substr(eqIndex + 1);
        opt.longName = opt.longName.substr(0, eqIndex);
      }
    }
    const int idx = getTRTOptionIndex(options, opt);
    if (idx < 0) {
      continue;
    }
    if (options[idx].valueRequired) {
      if (!value.empty()) {
        parsedArgs.values[idx].second.push_back(value);
        parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
        continue;
      }
      if (i + 1 >= argc) {
        return TRTParsedArgs{"Last argument requires value, but none given"};
      }
      const std::string nextArg(argv[i + 1]);
      if (nextArg.size() >= 1 && nextArg[0] == '-') {
        sample::gLogWarning << "Warning: Using '" << nextArg
                            << "' as a value for '" << argStr
                            << "', Should this be its own flag?" << std::endl;
      }
      parsedArgs.values[idx].second.push_back(nextArg);
      i += 1; // Next argument already consumed
      parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
    } else {
      parsedArgs.values[idx].first += 1;
    }
  }
  return parsedArgs;
 }
 TRTParsedArgs getOptions(int argc, const char* const* argv,
                         const std::vector<TRTOption>& options) {
  const std::string errMsg = validateTRTOptions(options);
  if (!errMsg.empty()) {
    return TRTParsedArgs{errMsg};
  }
  return parseArgs(argc, argv, options);
 }
 } // namespace utility
 } // namespace nvinfer1
--- a/csrc/fastdeploy/backends/tensorrt/common/getOptions.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/getOptions.h
@@ -1,128 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_GET_OPTIONS_H
 #define TRT_GET_OPTIONS_H
 #include <string>
 #include <utility>
 #include <vector>
 namespace nvinfer1 {
 namespace utility {
 //! TRTOption defines a command line option. At least 1 of shortName and
 //! longName
 //! must be defined.
 //! If bool initialization is undefined behavior on your system, valueRequired
 //! must also be explicitly defined.
 //! helpText is optional.
 struct TRTOption {
  char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
  std::string longName; //!< Option name in long (double hyphen) form (i.e.
                        //!--foo, --bar)
  bool valueRequired;   //!< True if a value is needed for an option (i.e. -N 4,
                        //!--foo bar)
  std::string helpText; //!< Text to show when printing out the command usage
 };
 //! TRTParsedArgs is returned by getOptions after it has parsed a command line
 //! argument list (argv).
 //!
 //! errMsg is a string containing an error message if any errors occurred. If it
 //! is empty, no errors occurred.
 //!
 //! values stores a vector of pairs for each option (ordered by order in the
 //! input). Each pair contains an int (the number of occurrences) and a vector
 //! of strings (a list of values). The user should know which of these to use,
 //! and which options required values. For non-value options, only occurrences
 //! is
 //! populated. For value-required options, occurrences == # of values. Values do
 //! not need to be unique.
 //!
 //! positionalArgs stores additional arguments that are passed in without an
 //! option (these must not start with a hyphen).
 struct TRTParsedArgs {
  std::string errMsg;
  std::vector<std::pair<int, std::vector<std::string>>> values;
  std::vector<std::string> positionalArgs;
 };
 //! Parse the input arguments passed to main() and extract options as well as
 //! positional arguments.
 //!
 //! Options are supposed to be passed to main() with a preceding hyphen '-'.
 //!
 //! If there is a single preceding hyphen, there should be exactly 1 character
 //! after the hyphen, which is interpreted as the option.
 //!
 //! If there are 2 preceding hyphens, the entire argument (without the hyphens)
 //! is interpreted as the option.
 //!
 //! If the option requires a value, the next argument is used as the value.
 //!
 //! Positional arguments must not start with a hyphen.
 //!
 //! If an argument requires a value, the next argument is interpreted as the
 //! value, even if it is the form of a valid option (i.e. --foo --bar will store
 //! "--bar" as a value for option "foo" if "foo" requires a value).
 //! We also support --name=value syntax. In this case, 'value' would be used as
 //! the value, NOT the next argument.
 //!
 //! For options:
 //!   { { 'a', "", false },
 //!     { 'b', "", false },
 //!     { 0, "cee", false },
 //!     { 'd', "", true },
 //!     { 'e', "", true },
 //!     { 'f', "foo", true } }
 //!
 //! ./main hello world -a -a --cee -d 12 -f 34
 //! and
 //! ./main hello world -a -a --cee -d 12 --foo 34
 //!
 //! will result in:
 //!
 //! TRTParsedArgs {
 //!      errMsg: "",
 //!      values: { { 2, {} },
 //!                { 0, {} },
 //!                { 1, {} },
 //!                { 1, {"12"} },
 //!                { 0, {} },
 //!                { 1, {"34"} } }
 //!      positionalArgs: {"hello", "world"},
 //! }
 //!
 //! Non-POSIX behavior:
 //!      - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
 //!        option must have its own hyphen prefix.
 //!      - Does not support -e12 as a shorthand for "-e 12". Values MUST be
 //!        whitespace-separated from the option it is for.
 //!
 //! @param[in] argc The number of arguments passed to main (including the
 //!            file name, which is disregarded)
 //! @param[in] argv The arguments passed to main (including the file name,
 //!            which is disregarded)
 //! @param[in] options List of TRTOptions to parse
 //! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
 //!         the fields.
 TRTParsedArgs getOptions(int argc, const char* const* argv,
                         const std::vector<TRTOption>& options);
 } // namespace utility
 } // namespace nvinfer1
 #endif // TRT_GET_OPTIONS_H
--- a/csrc/fastdeploy/backends/tensorrt/common/half.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/half.h
--- a/csrc/fastdeploy/backends/tensorrt/common/logger.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/logger.cpp
@@ -1,38 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "logger.h"
 #include "ErrorRecorder.h"
 #include "logging.h"
 SampleErrorRecorder gRecorder;
 namespace sample {
 Logger gLogger{Logger::Severity::kINFO};
 LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
 LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
 LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
 LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
 LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
 void setReportableSeverity(Logger::Severity severity) {
  gLogger.setReportableSeverity(severity);
  gLogVerbose.setReportableSeverity(severity);
  gLogInfo.setReportableSeverity(severity);
  gLogWarning.setReportableSeverity(severity);
  gLogError.setReportableSeverity(severity);
  gLogFatal.setReportableSeverity(severity);
 }
 } // namespace sample
--- a/csrc/fastdeploy/backends/tensorrt/common/logger.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/logger.h
@@ -1,35 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef LOGGER_H
 #define LOGGER_H
 #include "logging.h"
 class SampleErrorRecorder;
 extern SampleErrorRecorder gRecorder;
 namespace sample {
 extern Logger gLogger;
 extern LogStreamConsumer gLogVerbose;
 extern LogStreamConsumer gLogInfo;
 extern LogStreamConsumer gLogWarning;
 extern LogStreamConsumer gLogError;
 extern LogStreamConsumer gLogFatal;
 void setReportableSeverity(Logger::Severity severity);
 } // namespace sample
 #endif // LOGGER_H
--- a/csrc/fastdeploy/backends/tensorrt/common/logging.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/logging.h
@@ -1,573 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TENSORRT_LOGGING_H
 #define TENSORRT_LOGGING_H
 #include "NvInferRuntimeCommon.h"
 #include "sampleOptions.h"
 #include <cassert>
 #include <ctime>
 #include <iomanip>
 #include <iostream>
 #include <mutex>
 #include <ostream>
 #include <sstream>
 #include <string>
 namespace sample {
 using Severity = nvinfer1::ILogger::Severity;
 class LogStreamConsumerBuffer : public std::stringbuf {
 public:
  LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
                          bool shouldLog)
      : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
  LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
      : mOutput(other.mOutput), mPrefix(other.mPrefix),
        mShouldLog(other.mShouldLog) {}
  LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
  LogStreamConsumerBuffer() = delete;
  LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
  LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
  ~LogStreamConsumerBuffer() override {
    // std::streambuf::pbase() gives a pointer to the beginning of the buffered
    // part of the output sequence
    // std::streambuf::pptr() gives a pointer to the current position of the
    // output sequence
    // if the pointer to the beginning is not equal to the pointer to the
    // current position,
    // call putOutput() to log the output to the stream
    if (pbase() != pptr()) {
      putOutput();
    }
  }
  //!
  //! synchronizes the stream buffer and returns 0 on success
  //! synchronizing the stream buffer consists of inserting the buffer contents
  //! into the stream,
  //! resetting the buffer and flushing the stream
  //!
  int32_t sync() override {
    putOutput();
    return 0;
  }
  void putOutput() {
    if (mShouldLog) {
      // prepend timestamp
      std::time_t timestamp = std::time(nullptr);
      tm* tm_local = std::localtime(&timestamp);
      mOutput << "[";
      mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
              << "/";
      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
      mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
              << "-";
      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
      // std::stringbuf::str() gets the string contents of the buffer
      // insert the buffer contents pre-appended by the appropriate prefix into
      // the stream
      mOutput << mPrefix << str();
    }
    // set the buffer to empty
    str("");
    // flush the stream
    mOutput.flush();
  }
  void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
 private:
  std::ostream& mOutput;
  std::string mPrefix;
  bool mShouldLog{};
 }; // class LogStreamConsumerBuffer
 //!
 //! \class LogStreamConsumerBase
 //! \brief Convenience object used to initialize LogStreamConsumerBuffer before
 //! std::ostream in LogStreamConsumer
 //!
 class LogStreamConsumerBase {
 public:
  LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
                        bool shouldLog)
      : mBuffer(stream, prefix, shouldLog) {}
 protected:
  std::mutex mLogMutex;
  LogStreamConsumerBuffer mBuffer;
 }; // class LogStreamConsumerBase
 //!
 //! \class LogStreamConsumer
 //! \brief Convenience object used to facilitate use of C++ stream syntax when
 //! logging messages.
 //!  Order of base classes is LogStreamConsumerBase and then std::ostream.
 //!  This is because the LogStreamConsumerBase class is used to initialize the
 //!  LogStreamConsumerBuffer member field
 //!  in LogStreamConsumer and then the address of the buffer is passed to
 //!  std::ostream.
 //!  This is necessary to prevent the address of an uninitialized buffer from
 //!  being passed to std::ostream.
 //!  Please do not change the order of the parent classes.
 //!
 class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
 public:
  //!
  //! \brief Creates a LogStreamConsumer which logs messages with level
  //! severity.
  //!  Reportable severity determines if the messages are severe enough to be
  //!  logged.
  //!
  LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
                    nvinfer1::ILogger::Severity severity)
      : LogStreamConsumerBase(severityOstream(severity),
                              severityPrefix(severity),
                              severity <= reportableSeverity),
        std::ostream(&mBuffer) // links the stream buffer with the stream
        ,
        mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
  LogStreamConsumer(LogStreamConsumer&& other) noexcept
      : LogStreamConsumerBase(severityOstream(other.mSeverity),
                              severityPrefix(other.mSeverity),
                              other.mShouldLog),
        std::ostream(&mBuffer) // links the stream buffer with the stream
        ,
        mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
  LogStreamConsumer(const LogStreamConsumer& other) = delete;
  LogStreamConsumer() = delete;
  ~LogStreamConsumer() = default;
  LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
  LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
  void setReportableSeverity(Severity reportableSeverity) {
    mShouldLog = mSeverity <= reportableSeverity;
    mBuffer.setShouldLog(mShouldLog);
  }
  std::mutex& getMutex() { return mLogMutex; }
  bool getShouldLog() const { return mShouldLog; }
 private:
  static std::ostream& severityOstream(Severity severity) {
    return severity >= Severity::kINFO ? std::cout : std::cerr;
  }
  static std::string severityPrefix(Severity severity) {
    switch (severity) {
    case Severity::kINTERNAL_ERROR:
      return "[F] ";
    case Severity::kERROR:
      return "[E] ";
    case Severity::kWARNING:
      return "[W] ";
    case Severity::kINFO:
      return "[I] ";
    case Severity::kVERBOSE:
      return "[V] ";
    default:
      assert(0);
      return "";
    }
  }
  bool mShouldLog;
  Severity mSeverity;
 }; // class LogStreamConsumer
 template <typename T>
 LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
  if (logger.getShouldLog()) {
    std::lock_guard<std::mutex> guard(logger.getMutex());
    auto& os = static_cast<std::ostream&>(logger);
    os << obj;
  }
  return logger;
 }
 //!
 //! Special handling std::endl
 //!
 inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
                                     std::ostream& (*f)(std::ostream&)) {
  if (logger.getShouldLog()) {
    std::lock_guard<std::mutex> guard(logger.getMutex());
    auto& os = static_cast<std::ostream&>(logger);
    os << f;
  }
  return logger;
 }
 inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
                                     const nvinfer1::Dims& dims) {
  if (logger.getShouldLog()) {
    std::lock_guard<std::mutex> guard(logger.getMutex());
    auto& os = static_cast<std::ostream&>(logger);
    for (int32_t i = 0; i < dims.nbDims; ++i) {
      os << (i ? "x" : "") << dims.d[i];
    }
  }
  return logger;
 }
 //!
 //! \class Logger
 //!
 //! \brief Class which manages logging of TensorRT tools and samples
 //!
 //! \details This class provides a common interface for TensorRT tools and
 //! samples to log information to the console,
 //! and supports logging two types of messages:
 //!
 //! - Debugging messages with an associated severity (info, warning, error, or
 //! internal error/fatal)
 //! - Test pass/fail messages
 //!
 //! The advantage of having all samples use this class for logging as opposed to
 //! emitting directly to stdout/stderr is
 //! that the logic for controlling the verbosity and formatting of sample output
 //! is centralized in one location.
 //!
 //! In the future, this class could be extended to support dumping test results
 //! to a file in some standard format
 //! (for example, JUnit XML), and providing additional metadata (e.g. timing the
 //! duration of a test run).
 //!
 //! TODO: For backwards compatibility with existing samples, this class inherits
 //! directly from the nvinfer1::ILogger
 //! interface, which is problematic since there isn't a clean separation between
 //! messages coming from the TensorRT
 //! library and messages coming from the sample.
 //!
 //! In the future (once all samples are updated to use Logger::getTRTLogger() to
 //! access the ILogger) we can refactor the
 //! class to eliminate the inheritance and instead make the nvinfer1::ILogger
 //! implementation a member of the Logger
 //! object.
 //!
 class Logger : public nvinfer1::ILogger {
 public:
  explicit Logger(Severity severity = Severity::kWARNING)
      : mReportableSeverity(severity) {}
  //!
  //! \enum TestResult
  //! \brief Represents the state of a given test
  //!
  enum class TestResult {
    kRUNNING, //!< The test is running
    kPASSED,  //!< The test passed
    kFAILED,  //!< The test failed
    kWAIVED   //!< The test was waived
  };
  //!
  //! \brief Forward-compatible method for retrieving the nvinfer::ILogger
  //! associated with this Logger
  //! \return The nvinfer1::ILogger associated with this Logger
  //!
  //! TODO Once all samples are updated to use this method to register the
  //! logger with TensorRT,
  //! we can eliminate the inheritance of Logger from ILogger
  //!
  nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
  //!
  //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
  //!
  //! Note samples should not be calling this function directly; it will
  //! eventually go away once we eliminate the
  //! inheritance from nvinfer1::ILogger
  //!
  void log(Severity severity, const char* msg) noexcept override {
    LogStreamConsumer(mReportableSeverity, severity)
        << "[TRT] " << std::string(msg) << std::endl;
  }
  //!
  //! \brief Method for controlling the verbosity of logging output
  //!
  //! \param severity The logger will only emit messages that have severity of
  //! this level or higher.
  //!
  void setReportableSeverity(Severity severity) noexcept {
    mReportableSeverity = severity;
  }
  //!
  //! \brief Opaque handle that holds logging information for a particular test
  //!
  //! This object is an opaque handle to information used by the Logger to print
  //! test results.
  //! The sample must call Logger::defineTest() in order to obtain a TestAtom
  //! that can be used
  //! with Logger::reportTest{Start,End}().
  //!
  class TestAtom {
   public:
    TestAtom(TestAtom&&) = default;
   private:
    friend class Logger;
    TestAtom(bool started, const std::string& name, const std::string& cmdline)
        : mStarted(started), mName(name), mCmdline(cmdline) {}
    bool mStarted;
    std::string mName;
    std::string mCmdline;
  };
  //!
  //! \brief Define a test for logging
  //!
  //! \param[in] name The name of the test.  This should be a string starting
  //! with
  //!                  "TensorRT" and containing dot-separated strings
  //!                  containing
  //!                  the characters [A-Za-z0-9_].
  //!                  For example, "TensorRT.sample_googlenet"
  //! \param[in] cmdline The command line used to reproduce the test
  //
  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
  //!
  static TestAtom defineTest(const std::string& name,
                             const std::string& cmdline) {
    return TestAtom(false, name, cmdline);
  }
  //!
  //! \brief A convenience overloaded version of defineTest() that accepts an
  //! array of command-line arguments
  //!        as input
  //!
  //! \param[in] name The name of the test
  //! \param[in] argc The number of command-line arguments
  //! \param[in] argv The array of command-line arguments (given as C strings)
  //!
  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
  //!
  static TestAtom defineTest(const std::string& name, int32_t argc,
                             char const* const* argv) {
    // Append TensorRT version as info
    const std::string vname =
        name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
    auto cmdline = genCmdlineString(argc, argv);
    return defineTest(vname, cmdline);
  }
  //!
  //! \brief Report that a test has started.
  //!
  //! \pre reportTestStart() has not been called yet for the given testAtom
  //!
  //! \param[in] testAtom The handle to the test that has started
  //!
  static void reportTestStart(TestAtom& testAtom) {
    reportTestResult(testAtom, TestResult::kRUNNING);
    assert(!testAtom.mStarted);
    testAtom.mStarted = true;
  }
  //!
  //! \brief Report that a test has ended.
  //!
  //! \pre reportTestStart() has been called for the given testAtom
  //!
  //! \param[in] testAtom The handle to the test that has ended
  //! \param[in] result The result of the test. Should be one of
  //! TestResult::kPASSED,
  //!                   TestResult::kFAILED, TestResult::kWAIVED
  //!
  static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
    assert(result != TestResult::kRUNNING);
    assert(testAtom.mStarted);
    reportTestResult(testAtom, result);
  }
  static int32_t reportPass(TestAtom const& testAtom) {
    reportTestEnd(testAtom, TestResult::kPASSED);
    return EXIT_SUCCESS;
  }
  static int32_t reportFail(TestAtom const& testAtom) {
    reportTestEnd(testAtom, TestResult::kFAILED);
    return EXIT_FAILURE;
  }
  static int32_t reportWaive(TestAtom const& testAtom) {
    reportTestEnd(testAtom, TestResult::kWAIVED);
    return EXIT_SUCCESS;
  }
  static int32_t reportTest(TestAtom const& testAtom, bool pass) {
    return pass ? reportPass(testAtom) : reportFail(testAtom);
  }
  Severity getReportableSeverity() const { return mReportableSeverity; }
 private:
  //!
  //! \brief returns an appropriate string for prefixing a log message with the
  //! given severity
  //!
  static const char* severityPrefix(Severity severity) {
    switch (severity) {
    case Severity::kINTERNAL_ERROR:
      return "[F] ";
    case Severity::kERROR:
      return "[E] ";
    case Severity::kWARNING:
      return "[W] ";
    case Severity::kINFO:
      return "[I] ";
    case Severity::kVERBOSE:
      return "[V] ";
    default:
      assert(0);
      return "";
    }
  }
  //!
  //! \brief returns an appropriate string for prefixing a test result message
  //! with the given result
  //!
  static const char* testResultString(TestResult result) {
    switch (result) {
    case TestResult::kRUNNING:
      return "RUNNING";
    case TestResult::kPASSED:
      return "PASSED";
    case TestResult::kFAILED:
      return "FAILED";
    case TestResult::kWAIVED:
      return "WAIVED";
    default:
      assert(0);
      return "";
    }
  }
  //!
  //! \brief returns an appropriate output stream (cout or cerr) to use with the
  //! given severity
  //!
  static std::ostream& severityOstream(Severity severity) {
    return severity >= Severity::kINFO ? std::cout : std::cerr;
  }
  //!
  //! \brief method that implements logging test results
  //!
  static void reportTestResult(TestAtom const& testAtom, TestResult result) {
    severityOstream(Severity::kINFO)
        << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
        << testAtom.mCmdline << std::endl;
  }
  //!
  //! \brief generate a command line string from the given (argc, argv) values
  //!
  static std::string genCmdlineString(int32_t argc, char const* const* argv) {
    std::stringstream ss;
    for (int32_t i = 0; i < argc; i++) {
      if (i > 0) {
        ss << " ";
      }
      ss << argv[i];
    }
    return ss.str();
  }
  Severity mReportableSeverity;
 }; // class Logger
 namespace {
 //!
 //! \brief produces a LogStreamConsumer object that can be used to log messages
 //! of severity kVERBOSE
 //!
 //! Example usage:
 //!
 //!     LOG_VERBOSE(logger) << "hello world" << std::endl;
 //!
 inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
 }
 //!
 //! \brief produces a LogStreamConsumer object that can be used to log messages
 //! of severity kINFO
 //!
 //! Example usage:
 //!
 //!     LOG_INFO(logger) << "hello world" << std::endl;
 //!
 inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
 }
 //!
 //! \brief produces a LogStreamConsumer object that can be used to log messages
 //! of severity kWARNING
 //!
 //! Example usage:
 //!
 //!     LOG_WARN(logger) << "hello world" << std::endl;
 //!
 inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
 }
 //!
 //! \brief produces a LogStreamConsumer object that can be used to log messages
 //! of severity kERROR
 //!
 //! Example usage:
 //!
 //!     LOG_ERROR(logger) << "hello world" << std::endl;
 //!
 inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
 }
 //!
 //! \brief produces a LogStreamConsumer object that can be used to log messages
 //! of severity kINTERNAL_ERROR
 //!        ("fatal" severity)
 //!
 //! Example usage:
 //!
 //!     LOG_FATAL(logger) << "hello world" << std::endl;
 //!
 inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
  return LogStreamConsumer(logger.getReportableSeverity(),
                           Severity::kINTERNAL_ERROR);
 }
 } // anonymous namespace
 } // namespace sample
 #endif // TENSORRT_LOGGING_H
--- a/csrc/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
@@ -1,126 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef PARSER_ONNX_CONFIG_H
 #define PARSER_ONNX_CONFIG_H
 #include <cstring>
 #include <iostream>
 #include <string>
 #include "NvInfer.h"
 #include "NvOnnxConfig.h"
 #include "NvOnnxParser.h"
 #define ONNX_DEBUG 1
 /**
 * \class ParserOnnxConfig
 * \brief Configuration Manager Class Concrete Implementation
 *
 * \note:
 *
 */
 using namespace std;
 class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
 protected:
  string mModelFilename{};
  string mTextFilename{};
  string mFullTextFilename{};
  nvinfer1::DataType mModelDtype;
  nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
  bool mPrintLayercInfo;
 public:
  ParserOnnxConfig()
      : mModelDtype(nvinfer1::DataType::kFLOAT),
        mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
        mPrintLayercInfo(false) {
 #ifdef ONNX_DEBUG
    if (isDebug()) {
      std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
    }
 #endif
  }
 protected:
  ~ParserOnnxConfig() {
 #ifdef ONNX_DEBUG
    if (isDebug()) {
      std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
    }
 #endif
  }
 public:
  virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
    mModelDtype = modelDtype;
  }
  virtual nvinfer1::DataType getModelDtype() const noexcept {
    return mModelDtype;
  }
  virtual const char* getModelFileName() const noexcept {
    return mModelFilename.c_str();
  }
  virtual void setModelFileName(const char* onnxFilename) noexcept {
    mModelFilename = string(onnxFilename);
  }
  virtual nvonnxparser::IOnnxConfig::Verbosity
  getVerbosityLevel() const noexcept {
    return mVerbosity;
  }
  virtual void addVerbosity() noexcept { ++mVerbosity; }
  virtual void reduceVerbosity() noexcept { --mVerbosity; }
  virtual void
  setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
    mVerbosity = verbosity;
  }
  virtual const char* getTextFileName() const noexcept {
    return mTextFilename.c_str();
  }
  virtual void setTextFileName(const char* textFilename) noexcept {
    mTextFilename = string(textFilename);
  }
  virtual const char* getFullTextFileName() const noexcept {
    return mFullTextFilename.c_str();
  }
  virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
    mFullTextFilename = string(fullTextFilename);
  }
  virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
  virtual void setPrintLayerInfo(bool src) noexcept {
    mPrintLayercInfo = src;
  } //!< get the boolean variable corresponding to the Layer Info, see
    //! getPrintLayerInfo()
  virtual bool isDebug() const noexcept {
 #if ONNX_DEBUG
    return (std::getenv("ONNX_DEBUG") ? true : false);
 #else
    return false;
 #endif
  }
  virtual void destroy() noexcept { delete this; }
 }; // class ParserOnnxConfig
 #endif
--- a/csrc/fastdeploy/backends/tensorrt/common/safeCommon.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/safeCommon.h
@@ -1,65 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TENSORRT_SAFE_COMMON_H
 #define TENSORRT_SAFE_COMMON_H
 #include "NvInferRuntimeCommon.h"
 #include <cstdlib>
 #include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>
 #define CHECK(status)                                                          \
  do {                                                                         \
    auto ret = (status);                                                       \
    if (ret != 0) {                                                            \
      std::cerr << "Cuda failure: " << ret << std::endl;                       \
      abort();                                                                 \
    }                                                                          \
  } while (0)
 namespace samplesCommon {
 template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
  if (!obj) {
    throw std::runtime_error("Failed to create object");
  }
  return std::shared_ptr<T>(obj);
 }
 inline uint32_t elementSize(nvinfer1::DataType t) {
  switch (t) {
  case nvinfer1::DataType::kINT32:
  case nvinfer1::DataType::kFLOAT:
    return 4;
  case nvinfer1::DataType::kHALF:
    return 2;
  case nvinfer1::DataType::kINT8:
    return 1;
  case nvinfer1::DataType::kBOOL:
    return 1;
  }
  return 0;
 }
 template <typename A, typename B> inline A divUp(A x, B n) {
  return (x + n - 1) / n;
 }
 } // namespace samplesCommon
 #endif // TENSORRT_SAFE_COMMON_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleConfig.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleConfig.h
@@ -1,251 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef SampleConfig_H
 #define SampleConfig_H
 #include <cstring>
 #include <iostream>
 #include <string>
 #include "NvInfer.h"
 #include "NvOnnxConfig.h"
 class SampleConfig : public nvonnxparser::IOnnxConfig {
 public:
  enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
 private:
  std::string mModelFilename;
  std::string mEngineFilename;
  std::string mTextFilename;
  std::string mFullTextFilename;
  std::string mImageFilename;
  std::string mReferenceFilename;
  std::string mOutputFilename;
  std::string mCalibrationFilename;
  std::string mTimingCacheFilename;
  int64_t mLabel{-1};
  int64_t mMaxBatchSize{32};
  int64_t mCalibBatchSize{0};
  int64_t mMaxNCalibBatch{0};
  int64_t mFirstCalibBatch{0};
  int64_t mUseDLACore{-1};
  nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
  bool mTF32{true};
  Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
  bool mPrintLayercInfo{false};
  bool mDebugBuilder{false};
  InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
  uint64_t mTopK{0};
  float mFailurePercentage{-1.0f};
  float mTolerance{0.0f};
  float mAbsTolerance{1e-5f};
 public:
  SampleConfig() {
 #ifdef ONNX_DEBUG
    if (isDebug()) {
      std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
    }
 #endif
  }
 protected:
  ~SampleConfig() {
 #ifdef ONNX_DEBUG
    if (isDebug()) {
      std::cout << "SampleConfig::dtor(): " << this << std::endl;
    }
 #endif
  }
 public:
  void setModelDtype(const nvinfer1::DataType mdt) noexcept {
    mModelDtype = mdt;
  }
  nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
  bool getTF32() const noexcept { return mTF32; }
  void setTF32(bool enabled) noexcept { mTF32 = enabled; }
  const char* getModelFileName() const noexcept {
    return mModelFilename.c_str();
  }
  void setModelFileName(const char* onnxFilename) noexcept {
    mModelFilename = std::string(onnxFilename);
  }
  Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
  void addVerbosity() noexcept { ++mVerbosity; }
  void reduceVerbosity() noexcept { --mVerbosity; }
  virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
  const char* getEngineFileName() const noexcept {
    return mEngineFilename.c_str();
  }
  void setEngineFileName(const char* engineFilename) noexcept {
    mEngineFilename = std::string(engineFilename);
  }
  const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
  void setTextFileName(const char* textFilename) noexcept {
    mTextFilename = std::string(textFilename);
  }
  const char* getFullTextFileName() const noexcept {
    return mFullTextFilename.c_str();
  }
  void setFullTextFileName(const char* fullTextFilename) noexcept {
    mFullTextFilename = std::string(fullTextFilename);
  }
  void setLabel(int64_t label) noexcept { mLabel = label; } //!<  set the Label
  int64_t getLabel() const noexcept { return mLabel; } //!<  get the Label
  bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
  void setPrintLayerInfo(bool b) noexcept {
    mPrintLayercInfo = b;
  } //!< get the boolean variable corresponding to the Layer Info, see
    //! getPrintLayerInfo()
  void setMaxBatchSize(int64_t maxBatchSize) noexcept {
    mMaxBatchSize = maxBatchSize;
  } //!<  set the Max Batch Size
  int64_t getMaxBatchSize() const noexcept {
    return mMaxBatchSize;
  } //!<  get the Max Batch Size
  void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
    mCalibBatchSize = CalibBatchSize;
  } //!<  set the calibration batch size
  int64_t getCalibBatchSize() const noexcept {
    return mCalibBatchSize;
  } //!<  get calibration batch size
  void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
    mMaxNCalibBatch = MaxNCalibBatch;
  } //!<  set Max Number of Calibration Batches
  int64_t getMaxNCalibBatch() const noexcept {
    return mMaxNCalibBatch;
  } //!<  get the Max Number of Calibration Batches
  void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
    mFirstCalibBatch = FirstCalibBatch;
  } //!<  set the first calibration batch
  int64_t getFirstCalibBatch() const noexcept {
    return mFirstCalibBatch;
  } //!<  get the first calibration batch
  void setUseDLACore(int64_t UseDLACore) noexcept {
    mUseDLACore = UseDLACore;
  } //!<  set the DLA core to use
  int64_t getUseDLACore() const noexcept {
    return mUseDLACore;
  } //!<  get the DLA core to use
  void setDebugBuilder() noexcept {
    mDebugBuilder = true;
  } //!<  enable the Debug info, while building the engine.
  bool getDebugBuilder() const noexcept {
    return mDebugBuilder;
  } //!<  get the boolean variable, corresponding to the debug builder
  const char*
  getImageFileName() const noexcept //!<  set Image file name (PPM or ASCII)
  {
    return mImageFilename.c_str();
  }
  void setImageFileName(
      const char* imageFilename) noexcept //!< get the Image file name
  {
    mImageFilename = std::string(imageFilename);
  }
  const char* getReferenceFileName() const noexcept {
    return mReferenceFilename.c_str();
  }
  void setReferenceFileName(
      const char* referenceFilename) noexcept //!<  set reference file name
  {
    mReferenceFilename = std::string(referenceFilename);
  }
  void setInputDataFormat(InputDataFormat idt) noexcept {
    mInputDataFormat = idt;
  } //!<  specifies expected data format of the image file (PPM or ASCII)
  InputDataFormat getInputDataFormat() const noexcept {
    return mInputDataFormat;
  } //!<  returns the expected data format of the image file.
  const char* getOutputFileName()
      const noexcept //!<  specifies the file to save the results
  {
    return mOutputFilename.c_str();
  }
  void setOutputFileName(
      const char* outputFilename) noexcept //!<  get the output file name
  {
    mOutputFilename = std::string(outputFilename);
  }
  const char* getCalibrationFileName() const noexcept {
    return mCalibrationFilename.c_str();
  } //!<  specifies the file containing the list of image files for int8
    //! calibration
  void setCalibrationFileName(
      const char* calibrationFilename) noexcept //!<  get the int 8 calibration
                                                //! list file name
  {
    mCalibrationFilename = std::string(calibrationFilename);
  }
  uint64_t getTopK() const noexcept { return mTopK; }
  void setTopK(uint64_t topK) noexcept {
    mTopK = topK;
  } //!<  If this options is specified, return the K top probabilities.
  float getFailurePercentage() const noexcept { return mFailurePercentage; }
  void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
  float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
  void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
  float getTolerance() const noexcept { return mTolerance; }
  void setTolerance(float t) noexcept { mTolerance = t; }
  const char* getTimingCacheFilename() const noexcept {
    return mTimingCacheFilename.c_str();
  }
  void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
    mTimingCacheFilename = std::string(timingCacheFilename);
  }
  bool isDebug() const noexcept {
 #if ONNX_DEBUG
    return (std::getenv("ONNX_DEBUG") ? true : false);
 #else
    return false;
 #endif
  }
  void destroy() noexcept { delete this; }
 }; // class SampleConfig
 #endif
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleDevice.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleDevice.h
@@ -1,397 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_DEVICE_H
 #define TRT_SAMPLE_DEVICE_H
 #include <cassert>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <iostream>
 #include <thread>
 namespace sample {
 inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
  if (ret != cudaSuccess) {
    err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
    abort();
  }
 }
 class TrtCudaEvent;
 namespace {
 void cudaSleep(void* sleep) {
  std::this_thread::sleep_for(
      std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
 }
 } // namespace
 //!
 //! \class TrtCudaStream
 //! \brief Managed CUDA stream
 //!
 class TrtCudaStream {
 public:
  TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
  TrtCudaStream(const TrtCudaStream&) = delete;
  TrtCudaStream& operator=(const TrtCudaStream&) = delete;
  TrtCudaStream(TrtCudaStream&&) = delete;
  TrtCudaStream& operator=(TrtCudaStream&&) = delete;
  ~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
  cudaStream_t get() const { return mStream; }
  void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
  void wait(TrtCudaEvent& event);
  void sleep(float* ms) {
    cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
  }
 private:
  cudaStream_t mStream{};
 };
 //!
 //! \class TrtCudaEvent
 //! \brief Managed CUDA event
 //!
 class TrtCudaEvent {
 public:
  explicit TrtCudaEvent(bool blocking = true) {
    const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
    cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
  }
  TrtCudaEvent(const TrtCudaEvent&) = delete;
  TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
  TrtCudaEvent(TrtCudaEvent&&) = delete;
  TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
  ~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
  cudaEvent_t get() const { return mEvent; }
  void record(const TrtCudaStream& stream) {
    cudaCheck(cudaEventRecord(mEvent, stream.get()));
  }
  void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
  // Returns time elapsed time in milliseconds
  float operator-(const TrtCudaEvent& e) const {
    float time{0};
    cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
    return time;
  }
 private:
  cudaEvent_t mEvent{};
 };
 inline void TrtCudaStream::wait(TrtCudaEvent& event) {
  cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
 }
 //!
 //! \class TrtCudaGraph
 //! \brief Managed CUDA graph
 //!
 class TrtCudaGraph {
 public:
  explicit TrtCudaGraph() = default;
  TrtCudaGraph(const TrtCudaGraph&) = delete;
  TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
  TrtCudaGraph(TrtCudaGraph&&) = delete;
  TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
  ~TrtCudaGraph() {
    if (mGraphExec) {
      cudaGraphExecDestroy(mGraphExec);
    }
  }
  void beginCapture(TrtCudaStream& stream) {
    cudaCheck(
        cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
  }
  bool launch(TrtCudaStream& stream) {
    return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
  }
  void endCapture(TrtCudaStream& stream) {
    cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
    cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
    cudaCheck(cudaGraphDestroy(mGraph));
  }
  void endCaptureOnError(TrtCudaStream& stream) {
    // There are two possibilities why stream capture would fail:
    // (1) stream is in cudaErrorStreamCaptureInvalidated state.
    // (2) TRT reports a failure.
    // In case (1), the returning mGraph should be nullptr.
    // In case (2), the returning mGraph is not nullptr, but it should not be
    // used.
    const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
    if (ret == cudaErrorStreamCaptureInvalidated) {
      assert(mGraph == nullptr);
    } else {
      assert(ret == cudaSuccess);
      assert(mGraph != nullptr);
      cudaCheck(cudaGraphDestroy(mGraph));
      mGraph = nullptr;
    }
    // Clean up any CUDA error.
    cudaGetLastError();
    sample::gLogWarning << "The CUDA graph capture on the stream has failed."
                        << std::endl;
  }
 private:
  cudaGraph_t mGraph{};
  cudaGraphExec_t mGraphExec{};
 };
 //!
 //! \class TrtCudaBuffer
 //! \brief Managed buffer for host and device
 //!
 template <typename A, typename D> class TrtCudaBuffer {
 public:
  TrtCudaBuffer() = default;
  TrtCudaBuffer(const TrtCudaBuffer&) = delete;
  TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
  TrtCudaBuffer(TrtCudaBuffer&& rhs) {
    reset(rhs.mPtr);
    rhs.mPtr = nullptr;
  }
  TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
    if (this != &rhs) {
      reset(rhs.mPtr);
      rhs.mPtr = nullptr;
    }
    return *this;
  }
  ~TrtCudaBuffer() { reset(); }
  TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
  void allocate(size_t size) {
    reset();
    A()(&mPtr, size);
  }
  void reset(void* ptr = nullptr) {
    if (mPtr) {
      D()(mPtr);
    }
    mPtr = ptr;
  }
  void* get() const { return mPtr; }
 private:
  void* mPtr{nullptr};
 };
 struct DeviceAllocator {
  void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
 };
 struct DeviceDeallocator {
  void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
 };
 struct ManagedAllocator {
  void operator()(void** ptr, size_t size) {
    cudaCheck(cudaMallocManaged(ptr, size));
  }
 };
 struct HostAllocator {
  void operator()(void** ptr, size_t size) {
    cudaCheck(cudaMallocHost(ptr, size));
  }
 };
 struct HostDeallocator {
  void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
 };
 using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
 using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
 using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
 //!
 //! \class MirroredBuffer
 //! \brief Coupled host and device buffers
 //!
 class IMirroredBuffer {
 public:
  //!
  //! Allocate memory for the mirrored buffer give the size
  //! of the allocation.
  //!
  virtual void allocate(size_t size) = 0;
  //!
  //! Get the pointer to the device side buffer.
  //!
  //! \return pointer to device memory or nullptr if uninitialized.
  //!
  virtual void* getDeviceBuffer() const = 0;
  //!
  //! Get the pointer to the host side buffer.
  //!
  //! \return pointer to host memory or nullptr if uninitialized.
  //!
  virtual void* getHostBuffer() const = 0;
  //!
  //! Copy the memory from host to device.
  //!
  virtual void hostToDevice(TrtCudaStream& stream) = 0;
  //!
  //! Copy the memory from device to host.
  //!
  virtual void deviceToHost(TrtCudaStream& stream) = 0;
  //!
  //! Interface to get the size of the memory
  //!
  //! \return the size of memory allocated.
  //!
  virtual size_t getSize() const = 0;
  //!
  //! Virtual destructor declaraion
  //!
  virtual ~IMirroredBuffer() = default;
 }; // class IMirroredBuffer
 //!
 //! Class to have a seperate memory buffer for discrete device and host
 //! allocations.
 //!
 class DiscreteMirroredBuffer : public IMirroredBuffer {
 public:
  void allocate(size_t size) {
    mSize = size;
    mHostBuffer.allocate(size);
    mDeviceBuffer.allocate(size);
  }
  void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
  void* getHostBuffer() const { return mHostBuffer.get(); }
  void hostToDevice(TrtCudaStream& stream) {
    cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
                              cudaMemcpyHostToDevice, stream.get()));
  }
  void deviceToHost(TrtCudaStream& stream) {
    cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
                              cudaMemcpyDeviceToHost, stream.get()));
  }
  size_t getSize() const { return mSize; }
 private:
  size_t mSize{0};
  TrtHostBuffer mHostBuffer;
  TrtDeviceBuffer mDeviceBuffer;
 }; // class DiscreteMirroredBuffer
 //!
 //! Class to have a unified memory buffer for embedded devices.
 //!
 class UnifiedMirroredBuffer : public IMirroredBuffer {
 public:
  void allocate(size_t size) {
    mSize = size;
    mBuffer.allocate(size);
  }
  void* getDeviceBuffer() const { return mBuffer.get(); }
  void* getHostBuffer() const { return mBuffer.get(); }
  void hostToDevice(TrtCudaStream& stream) {
    // Does nothing since we are using unified memory.
  }
  void deviceToHost(TrtCudaStream& stream) {
    // Does nothing since we are using unified memory.
  }
  size_t getSize() const { return mSize; }
 private:
  size_t mSize{0};
  TrtManagedBuffer mBuffer;
 }; // class UnifiedMirroredBuffer
 inline void setCudaDevice(int device, std::ostream& os) {
  cudaCheck(cudaSetDevice(device));
  cudaDeviceProp properties;
  cudaCheck(cudaGetDeviceProperties(&properties, device));
  // clang-format off
    os << "=== Device Information ===" << std::endl;
    os << "Selected Device: "      << properties.name                                               << std::endl;
    os << "Compute Capability: "   << properties.major << "." << properties.minor                   << std::endl;
    os << "SMs: "                  << properties.multiProcessorCount                                << std::endl;
    os << "Compute Clock Rate: "   << properties.clockRate / 1000000.0F << " GHz"                   << std::endl;
    os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB"                   << std::endl;
    os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB"       << std::endl;
    os << "Memory Bus Width: "     << properties.memoryBusWidth << " bits"
                        << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
    os << "Memory Clock Rate: "    << properties.memoryClockRate / 1000000.0F << " GHz"             << std::endl;
  // clang-format on
 }
 } // namespace sample
 #endif // TRT_SAMPLE_DEVICE_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleEngines.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleEngines.h
@@ -1,195 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_ENGINES_H
 #define TRT_SAMPLE_ENGINES_H
 #include <iostream>
 #include <vector>
 //#include "NvCaffeParser.h"
 #include "NvInfer.h"
 #include "NvInferConsistency.h"
 #include "NvInferSafeRuntime.h"
 #include "NvOnnxParser.h"
 #include "sampleOptions.h"
 #include "sampleUtils.h"
 namespace sample {
 struct Parser {
 //  TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
  TrtUniquePtr<nvonnxparser::IParser> onnxParser;
  operator bool() const { return false || onnxParser; }
 };
 struct BuildEnvironment {
  TrtUniquePtr<INetworkDefinition> network;
  //! Parser that creates the network. Must be declared *after* network, so that
  //! when
  //! ~BuildEnvironment() executes, the parser is destroyed before the network
  //! is destroyed.
  Parser parser;
  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
  std::vector<uint8_t> engineBlob;
 };
 //!
 //! \brief Generate a network definition for a given model
 //!
 //! \return Parser The parser used to initialize the network and that holds the
 //! weights for the network, or an invalid
 //! parser (the returned parser converts to false if tested)
 //!
 //! Constant input dimensions in the model must not be changed in the
 //! corresponding
 //! network definition, because its correctness may rely on the constants.
 //!
 //! \see Parser::operator bool()
 //!
 Parser modelToNetwork(const ModelOptions& model,
                      nvinfer1::INetworkDefinition& network, std::ostream& err);
 //!
 //! \brief Set up network and config
 //!
 //! \return boolean Return true if network and config were successfully set
 //!
 bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
                           IBuilder& builder, INetworkDefinition& network,
                           IBuilderConfig& config, std::ostream& err,
                           std::vector<std::vector<char>>& sparseWeights);
 //!
 //! \brief Log refittable layers and weights of a refittable engine
 //!
 void dumpRefittable(nvinfer1::ICudaEngine& engine);
 //!
 //! \brief Load a serialized engine
 //!
 //! \return Pointer to the engine loaded or nullptr if the operation failed
 //!
 nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
                                  std::ostream& err);
 //!
 //! \brief Save an engine into a file
 //!
 //! \return boolean Return true if the engine was successfully saved
 //!
 bool saveEngine(const nvinfer1::ICudaEngine& engine,
                const std::string& fileName, std::ostream& err);
 //!
 //! \brief Create an engine from model or serialized file, and optionally save
 //! engine
 //!
 //! \return Pointer to the engine created or nullptr if the creation failed
 //!
 bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
                       const SystemOptions& sys, BuildEnvironment& env,
                       std::ostream& err);
 //!
 //! \brief Create an engine from model or serialized file, and optionally save
 //! engine
 //!
 //! \return Pointer to the engine created or nullptr if the creation failed
 //!
 inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
                                                     const BuildOptions& build,
                                                     const SystemOptions& sys,
                                                     std::ostream& err) {
  BuildEnvironment env;
  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
  if (getEngineBuildEnv(model, build, sys, env, err)) {
    engine.swap(env.engine);
  }
  return engine;
 }
 //!
 //! \brief Create a serialized network
 //!
 //! \return Pointer to a host memory for a serialized network
 //!
 IHostMemory* networkToSerialized(const BuildOptions& build,
                                 const SystemOptions& sys, IBuilder& builder,
                                 INetworkDefinition& network,
                                 std::ostream& err);
 //!
 //! \brief Tranfer model to a serialized network
 //!
 //! \return Pointer to a host memory for a serialized network
 //!
 IHostMemory* modelToSerialized(const ModelOptions& model,
                               const BuildOptions& build,
                               const SystemOptions& sys, std::ostream& err);
 //!
 //! \brief Serialize network and save it into a file
 //!
 //! \return boolean Return true if the network was successfully serialized and
 //! saved
 //!
 bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
                      const SystemOptions& sys, std::ostream& err);
 bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
               bool multiThreading);
 //!
 //! \brief Set tensor scales from a calibration table
 //!
 void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
                                    const std::vector<IOFormat>& inputFormats,
                                    const std::vector<IOFormat>& outputFormats,
                                    const std::string& calibrationFile);
 //!
 //! \brief Check if safe runtime is loaded.
 //!
 bool hasSafeRuntime();
 //!
 //! \brief Create a safe runtime object if the dynamic library is loaded.
 //!
 nvinfer1::safe::IRuntime*
 createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
 //!
 //! \brief Check if consistency checker is loaded.
 //!
 bool hasConsistencyChecker();
 //!
 //! \brief Create a consistency checker object if the dynamic library is loaded.
 //!
 nvinfer1::consistency::IConsistencyChecker*
 createConsistencyChecker(nvinfer1::ILogger& logger,
                         IHostMemory const* engine) noexcept;
 //!
 //! \brief Run consistency check on serialized engine.
 //!
 bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
 } // namespace sample
 #endif // TRT_SAMPLE_ENGINES_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleInference.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleInference.cpp
@@ -1,943 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <algorithm>
 #include <array>
 #include <chrono>
 #include <cuda_profiler_api.h>
 #include <functional>
 #include <limits>
 #include <memory>
 #include <mutex>
 #include <numeric>
 #include <thread>
 #include <utility>
 #include <vector>
 #if defined(__QNX__)
 #include <sys/neutrino.h>
 #include <sys/syspage.h>
 #endif
 #include "NvInfer.h"
 #include "ErrorRecorder.h"
 #include "logger.h"
 #include "sampleDevice.h"
 #include "sampleEngines.h"
 #include "sampleInference.h"
 #include "sampleOptions.h"
 #include "sampleReporting.h"
 #include "sampleUtils.h"
 namespace sample {
 template <class MapType, class EngineType>
 bool validateTensorNames(const MapType& map, const EngineType* engine,
                         const int32_t endBindingIndex) {
  // Check if the provided input tensor names match the input tensors of the
  // engine.
  // Throw an error if the provided input tensor names cannot be found because
  // it implies a potential typo.
  for (const auto& item : map) {
    bool tensorNameFound{false};
    for (int32_t b = 0; b < endBindingIndex; ++b) {
      if (engine->bindingIsInput(b) &&
          engine->getBindingName(b) == item.first) {
        tensorNameFound = true;
        break;
      }
    }
    if (!tensorNameFound) {
      sample::gLogError
          << "Cannot find input tensor with name \"" << item.first
          << "\" in the engine bindings! "
          << "Please make sure the input tensor names are correct."
          << std::endl;
      return false;
    }
  }
  return true;
 }
 template <class EngineType, class ContextType> class FillBindingClosure {
 private:
  using InputsMap = std::unordered_map<std::string, std::string>;
  using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
  EngineType const* engine;
  ContextType const* context;
  InputsMap const& inputs;
  BindingsVector& bindings;
  int32_t batch;
  int32_t endBindingIndex;
  void fillOneBinding(int32_t bindingIndex, int64_t vol) {
    auto const dims = getDims(bindingIndex);
    auto const name = engine->getBindingName(bindingIndex);
    auto const isInput = engine->bindingIsInput(bindingIndex);
    auto const dataType = engine->getBindingDataType(bindingIndex);
    auto const* bindingInOutStr = isInput ? "input" : "output";
    for (auto& binding : bindings) {
      const auto input = inputs.find(name);
      if (isInput && input != inputs.end()) {
        sample::gLogInfo << "Using values loaded from " << input->second
                         << " for input " << name << std::endl;
        binding->addBinding(bindingIndex, name, isInput, vol, dataType,
                            input->second);
      } else {
        sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
                         << name << std::endl;
        binding->addBinding(bindingIndex, name, isInput, vol, dataType);
      }
      sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
                       << name << " with dimensions " << dims << std::endl;
    }
  }
  bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
    if (!validateTensorNames(inputs, engine, endBindingIndex)) {
      sample::gLogError << "Invalid tensor names found in --loadInputs flag."
                        << std::endl;
      return false;
    }
    for (int32_t b = 0; b < endBindingIndex; b++) {
      auto const dims = getDims(b);
      auto const comps = engine->getBindingComponentsPerElement(b);
      auto const strides = context->getStrides(b);
      int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
      auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
      fillOneBinding(b, vol);
    }
    return true;
  }
  Dims getDims(int32_t bindingIndex);
 public:
  FillBindingClosure(EngineType const* _engine, ContextType const* _context,
                     InputsMap const& _inputs, BindingsVector& _bindings,
                     int32_t _batch, int32_t _endBindingIndex)
      : engine(_engine), context(_context), inputs(_inputs),
        bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
  bool operator()() { return fillAllBindings(batch, endBindingIndex); }
 };
 template <>
 Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
    getDims(int32_t bindingIndex) {
  return context->getBindingDimensions(bindingIndex);
 }
 template <>
 Dims FillBindingClosure<
    nvinfer1::safe::ICudaEngine,
    nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
  return engine->getBindingDimensions(bindingIndex);
 }
 bool setUpInference(InferenceEnvironment& iEnv,
                    const InferenceOptions& inference) {
  int32_t device{};
  cudaCheck(cudaGetDevice(&device));
  cudaDeviceProp properties;
  cudaCheck(cudaGetDeviceProperties(&properties, device));
  // Use managed memory on integrated devices when transfers are skipped
  // and when it is explicitly requested on the commandline.
  bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
                        inference.useManaged};
  using FillSafeBindings =
      FillBindingClosure<nvinfer1::safe::ICudaEngine,
                         nvinfer1::safe::IExecutionContext>;
  if (iEnv.safe) {
    ASSERT(sample::hasSafeRuntime());
    auto* safeEngine = iEnv.safeEngine.get();
    for (int32_t s = 0; s < inference.streams; ++s) {
      iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
      iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
    }
    const int32_t nBindings = safeEngine->getNbBindings();
    auto const* safeContext = iEnv.safeContext.front().get();
    // batch is set to 1 because safety only support explicit batch.
    return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
                            inference.inputs, iEnv.bindings, 1, nBindings)();
  }
  using FillStdBindings =
      FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
  for (int32_t s = 0; s < inference.streams; ++s) {
    auto ec = iEnv.engine->createExecutionContext();
    if (ec == nullptr) {
      sample::gLogError << "Unable to create execution context for stream " << s
                        << "." << std::endl;
      return false;
    }
    iEnv.context.emplace_back(ec);
    iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
  }
  if (iEnv.profiler) {
    iEnv.context.front()->setProfiler(iEnv.profiler.get());
    // Always run reportToProfiler() after enqueue launch
    iEnv.context.front()->setEnqueueEmitsProfile(false);
  }
  const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
  const int32_t nBindings = iEnv.engine->getNbBindings();
  const int32_t bindingsInProfile =
      nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
  const int32_t endBindingIndex =
      bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
  if (nOptProfiles > 1) {
    sample::gLogWarning << "Multiple profiles are currently not supported. "
                           "Running with one profile."
                        << std::endl;
  }
  // Make sure that the tensor names provided in command-line args actually
  // exist in any of the engine bindings
  // to avoid silent typos.
  if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
                           endBindingIndex)) {
    sample::gLogError << "Invalid tensor names found in --shapes flag."
                      << std::endl;
    return false;
  }
  // Set all input dimensions before all bindings can be allocated
  for (int32_t b = 0; b < endBindingIndex; ++b) {
    if (iEnv.engine->bindingIsInput(b)) {
      auto dims = iEnv.context.front()->getBindingDimensions(b);
      const bool isScalar = dims.nbDims == 0;
      const bool isDynamicInput =
          std::any_of(dims.d, dims.d + dims.nbDims,
                      [](int32_t dim) { return dim == -1; }) ||
          iEnv.engine->isShapeBinding(b);
      if (isDynamicInput) {
        auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
        std::vector<int32_t> staticDims;
        if (shape == inference.shapes.end()) {
          // If no shape is provided, set dynamic dimensions to 1.
          constexpr int32_t DEFAULT_DIMENSION = 1;
          if (iEnv.engine->isShapeBinding(b)) {
            if (isScalar) {
              staticDims.push_back(1);
            } else {
              staticDims.resize(dims.d[0]);
              std::fill(staticDims.begin(), staticDims.end(),
                        DEFAULT_DIMENSION);
            }
          } else {
            staticDims.resize(dims.nbDims);
            std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
                           [&](int32_t dimension) {
                             return dimension >= 0 ? dimension
                                                   : DEFAULT_DIMENSION;
                           });
          }
          sample::gLogWarning << "Dynamic dimensions required for input: "
                              << iEnv.engine->getBindingName(b)
                              << ", but no shapes were provided. Automatically "
                                 "overriding shape to: "
                              << staticDims << std::endl;
        } else if (inference.inputs.count(shape->first) &&
                   iEnv.engine->isShapeBinding(b)) {
          if (isScalar || dims.nbDims == 1) {
            // Load shape tensor from file.
            size_t const size = isScalar ? 1 : dims.d[0];
            staticDims.resize(size);
            auto const& filename = inference.inputs.at(shape->first);
            auto dst = reinterpret_cast<char*>(staticDims.data());
            loadFromFile(filename, dst,
                         size * sizeof(decltype(staticDims)::value_type));
          } else {
            sample::gLogWarning << "Cannot load shape tensor " << shape->first
                                << " from file, "
                                << "ND-Shape isn't supported yet" << std::endl;
            // Fallback
            staticDims = shape->second;
          }
        } else {
          staticDims = shape->second;
        }
        for (auto& c : iEnv.context) {
          if (iEnv.engine->isShapeBinding(b)) {
            if (!c->setInputShapeBinding(b, staticDims.data())) {
              return false;
            }
          } else {
            if (!c->setBindingDimensions(b, toDims(staticDims))) {
              return false;
            }
          }
        }
      }
    }
  }
  auto* engine = iEnv.engine.get();
  auto const* context = iEnv.context.front().get();
  int32_t const batch =
      engine->hasImplicitBatchDimension() ? inference.batch : 1;
  return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
                         batch, endBindingIndex)();
 }
 namespace {
 #if defined(__QNX__)
 using TimePoint = double;
 #else
 using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
 #endif
 TimePoint getCurrentTime() {
 #if defined(__QNX__)
  uint64_t const currentCycles = ClockCycles();
  uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
  // Return current timestamp in ms.
  return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
 #else
  return std::chrono::high_resolution_clock::now();
 #endif
 }
 //!
 //! \struct SyncStruct
 //! \brief Threads synchronization structure
 //!
 struct SyncStruct {
  std::mutex mutex;
  TrtCudaStream mainStream;
  TrtCudaEvent gpuStart{cudaEventBlockingSync};
  TimePoint cpuStart{};
  float sleep{};
 };
 struct Enqueue {
  explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
      : mContext(context), mBuffers(buffers) {}
  nvinfer1::IExecutionContext& mContext;
  void** mBuffers{};
 };
 //!
 //! \class EnqueueImplicit
 //! \brief Functor to enqueue inference with implict batch
 //!
 class EnqueueImplicit : private Enqueue {
 public:
  explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
                           int32_t batch)
      : Enqueue(context, buffers), mBatch(batch) {}
  bool operator()(TrtCudaStream& stream) const {
    if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
      // Collecting layer timing info from current profile index of execution
      // context
      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
          !mContext.reportToProfiler()) {
        gLogWarning
            << "Failed to collect layer timing info from previous enqueue()"
            << std::endl;
      }
      return true;
    }
    return false;
  }
 private:
  int32_t mBatch;
 };
 //!
 //! \class EnqueueExplicit
 //! \brief Functor to enqueue inference with explict batch
 //!
 class EnqueueExplicit : private Enqueue {
 public:
  explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
      : Enqueue(context, buffers) {}
  bool operator()(TrtCudaStream& stream) const {
    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
      // Collecting layer timing info from current profile index of execution
      // context
      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
          !mContext.reportToProfiler()) {
        gLogWarning
            << "Failed to collect layer timing info from previous enqueueV2()"
            << std::endl;
      }
      return true;
    }
    return false;
  }
 };
 //!
 //! \class EnqueueGraph
 //! \brief Functor to enqueue inference from CUDA Graph
 //!
 class EnqueueGraph {
 public:
  explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
                        TrtCudaGraph& graph)
      : mGraph(graph), mContext(context) {}
  bool operator()(TrtCudaStream& stream) const {
    if (mGraph.launch(stream)) {
      // Collecting layer timing info from current profile index of execution
      // context
      if (mContext.getProfiler() && !mContext.reportToProfiler()) {
        gLogWarning << "Failed to collect layer timing info from previous CUDA "
                       "graph launch"
                    << std::endl;
      }
      return true;
    }
    return false;
  }
  TrtCudaGraph& mGraph;
  nvinfer1::IExecutionContext& mContext;
 };
 //!
 //! \class EnqueueSafe
 //! \brief Functor to enqueue safe execution context
 //!
 class EnqueueSafe {
 public:
  explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
                       void** buffers)
      : mContext(context), mBuffers(buffers) {}
  bool operator()(TrtCudaStream& stream) const {
    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
      return true;
    }
    return false;
  }
  nvinfer1::safe::IExecutionContext& mContext;
  void** mBuffers{};
 };
 using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
 enum class StreamType : int32_t {
  kINPUT = 0,
  kCOMPUTE = 1,
  kOUTPUT = 2,
  kNUM = 3
 };
 enum class EventType : int32_t {
  kINPUT_S = 0,
  kINPUT_E = 1,
  kCOMPUTE_S = 2,
  kCOMPUTE_E = 3,
  kOUTPUT_S = 4,
  kOUTPUT_E = 5,
  kNUM = 6
 };
 using MultiStream =
    std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
 using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
                              static_cast<int32_t>(EventType::kNUM)>;
 using EnqueueTimes = std::array<TimePoint, 2>;
 //!
 //! \class Iteration
 //! \brief Inference iteration and streams management
 //!
 template <class ContextType> class Iteration {
 public:
  Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
            Bindings& bindings)
      : mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
        mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
        mContext(&context) {
    for (int32_t d = 0; d < mDepth; ++d) {
      for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
        mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
      }
    }
    createEnqueueFunction(inference, context, bindings);
  }
  bool query(bool skipTransfers) {
    if (mActive[mNext]) {
      return true;
    }
    if (!skipTransfers) {
      record(EventType::kINPUT_S, StreamType::kINPUT);
      mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
      record(EventType::kINPUT_E, StreamType::kINPUT);
      wait(EventType::kINPUT_E,
           StreamType::kCOMPUTE); // Wait for input DMA before compute
    }
    record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
    recordEnqueueTime();
    if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
      return false;
    }
    recordEnqueueTime();
    record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
    if (!skipTransfers) {
      wait(EventType::kCOMPUTE_E,
           StreamType::kOUTPUT); // Wait for compute before output DMA
      record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
      mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
      record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
    }
    mActive[mNext] = true;
    moveNext();
    return true;
  }
  float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
             std::vector<InferenceTrace>& trace, bool skipTransfers) {
    if (mActive[mNext]) {
      if (skipTransfers) {
        getEvent(EventType::kCOMPUTE_E).synchronize();
      } else {
        getEvent(EventType::kOUTPUT_E).synchronize();
      }
      trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
      mActive[mNext] = false;
      return getEvent(EventType::kCOMPUTE_S) - gpuStart;
    }
    return 0;
  }
  void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
               std::vector<InferenceTrace>& trace, bool skipTransfers) {
    for (int32_t d = 0; d < mDepth; ++d) {
      sync(cpuStart, gpuStart, trace, skipTransfers);
      moveNext();
    }
  }
  void wait(TrtCudaEvent& gpuStart) {
    getStream(StreamType::kINPUT).wait(gpuStart);
  }
  void setInputData() {
    mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
  }
  void fetchOutputData() {
    mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
  }
 private:
  void moveNext() { mNext = mDepth - 1 - mNext; }
  TrtCudaStream& getStream(StreamType t) {
    return mStream[static_cast<int32_t>(t)];
  }
  TrtCudaEvent& getEvent(EventType t) {
    return *mEvents[mNext][static_cast<int32_t>(t)];
  }
  void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
  void recordEnqueueTime() {
    mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
    enqueueStart = 1 - enqueueStart;
  }
  TimePoint getEnqueueTime(bool start) {
    return mEnqueueTimes[mNext][start ? 0 : 1];
  }
  void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
  InferenceTrace getTrace(const TimePoint& cpuStart,
                          const TrtCudaEvent& gpuStart, bool skipTransfers) {
    float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
                             : getEvent(EventType::kINPUT_S) - gpuStart;
    float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
                             : getEvent(EventType::kINPUT_E) - gpuStart;
    float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
                             : getEvent(EventType::kOUTPUT_S) - gpuStart;
    float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
                             : getEvent(EventType::kOUTPUT_E) - gpuStart;
    return InferenceTrace(mStreamId,
                          std::chrono::duration<float, std::milli>(
                              getEnqueueTime(true) - cpuStart)
                              .count(),
                          std::chrono::duration<float, std::milli>(
                              getEnqueueTime(false) - cpuStart)
                              .count(),
                          is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
                          getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
  }
  void createEnqueueFunction(const InferenceOptions& inference,
                             nvinfer1::IExecutionContext& context,
                             Bindings& bindings) {
    if (inference.batch) {
      mEnqueue = EnqueueFunction(EnqueueImplicit(
          context, mBindings.getDeviceBuffers(), inference.batch));
    } else {
      mEnqueue = EnqueueFunction(
          EnqueueExplicit(context, mBindings.getDeviceBuffers()));
    }
    if (inference.graph) {
      TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
      // Avoid capturing initialization calls by executing the enqueue function
      // at least
      // once before starting CUDA graph capture.
      const auto ret = mEnqueue(stream);
      assert(ret);
      stream.synchronize();
      mGraph.beginCapture(stream);
      // The built TRT engine may contain operations that are not permitted
      // under CUDA graph capture mode.
      // When the stream is capturing, the enqueue call may return false if the
      // current CUDA graph capture fails.
      if (mEnqueue(stream)) {
        mGraph.endCapture(stream);
        mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
      } else {
        mGraph.endCaptureOnError(stream);
        // Ensure any CUDA error has been cleaned up.
        cudaCheck(cudaGetLastError());
        sample::gLogWarning << "The built TensorRT engine contains operations "
                               "that are not permitted under "
                               "CUDA graph capture mode."
                            << std::endl;
        sample::gLogWarning << "The specified --useCudaGraph flag has been "
                               "ignored. The inference will be "
                               "launched without using CUDA graph launch."
                            << std::endl;
      }
    }
  }
  void createEnqueueFunction(const InferenceOptions&,
                             nvinfer1::safe::IExecutionContext& context,
                             Bindings&) {
    mEnqueue =
        EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
  }
  Bindings& mBindings;
  TrtCudaGraph mGraph;
  EnqueueFunction mEnqueue;
  int32_t mStreamId{0};
  int32_t mNext{0};
  int32_t mDepth{2}; // default to double buffer to hide DMA transfers
  std::vector<bool> mActive;
  MultiStream mStream;
  std::vector<MultiEvent> mEvents;
  int32_t enqueueStart{0};
  std::vector<EnqueueTimes> mEnqueueTimes;
  ContextType* mContext{nullptr};
 };
 template <class ContextType>
 bool inferenceLoop(
    std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
    const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
    float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
    bool skipTransfers, float idleMs) {
  float durationMs = 0;
  int32_t skip = 0;
  for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
       ++i) {
    for (auto& s : iStreams) {
      if (!s->query(skipTransfers)) {
        return false;
      }
    }
    for (auto& s : iStreams) {
      durationMs = std::max(durationMs,
                            s->sync(cpuStart, gpuStart, trace, skipTransfers));
    }
    if (durationMs < warmupMs) // Warming up
    {
      if (durationMs) // Skip complete iterations
      {
        ++skip;
      }
      continue;
    }
    if (idleMs != 0.F) {
      std::this_thread::sleep_for(
          std::chrono::duration<float, std::milli>(idleMs));
    }
  }
  for (auto& s : iStreams) {
    s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
  }
  return true;
 }
 template <class ContextType>
 void inferenceExecution(const InferenceOptions& inference,
                        InferenceEnvironment& iEnv, SyncStruct& sync,
                        const int32_t threadIdx, const int32_t streamsPerThread,
                        int32_t device, std::vector<InferenceTrace>& trace) {
  float warmupMs = inference.warmup;
  float durationMs = inference.duration * 1000.F + warmupMs;
  cudaCheck(cudaSetDevice(device));
  std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
  for (int32_t s = 0; s < streamsPerThread; ++s) {
    const int32_t streamId{threadIdx * streamsPerThread + s};
    auto* iteration = new Iteration<ContextType>(
        streamId, inference, *iEnv.template getContext<ContextType>(streamId),
        *iEnv.bindings[streamId]);
    if (inference.skipTransfers) {
      iteration->setInputData();
    }
    iStreams.emplace_back(iteration);
  }
  for (auto& s : iStreams) {
    s->wait(sync.gpuStart);
  }
  std::vector<InferenceTrace> localTrace;
  if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
                     inference.iterations, durationMs, warmupMs, localTrace,
                     inference.skipTransfers, inference.idle)) {
    iEnv.error = true;
  }
  if (inference.skipTransfers) {
    for (auto& s : iStreams) {
      s->fetchOutputData();
    }
  }
  sync.mutex.lock();
  trace.insert(trace.end(), localTrace.begin(), localTrace.end());
  sync.mutex.unlock();
 }
 inline std::thread makeThread(const InferenceOptions& inference,
                              InferenceEnvironment& iEnv, SyncStruct& sync,
                              int32_t threadIdx, int32_t streamsPerThread,
                              int32_t device,
                              std::vector<InferenceTrace>& trace) {
  if (iEnv.safe) {
    ASSERT(sample::hasSafeRuntime());
    return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
                       std::cref(inference), std::ref(iEnv), std::ref(sync),
                       threadIdx, streamsPerThread, device, std::ref(trace));
  }
  return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
                     std::cref(inference), std::ref(iEnv), std::ref(sync),
                     threadIdx, streamsPerThread, device, std::ref(trace));
 }
 } // namespace
 bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
                  int32_t device, std::vector<InferenceTrace>& trace) {
  cudaCheck(cudaProfilerStart());
  trace.resize(0);
  SyncStruct sync;
  sync.sleep = inference.sleep;
  sync.mainStream.sleep(&sync.sleep);
  sync.cpuStart = getCurrentTime();
  sync.gpuStart.record(sync.mainStream);
  // When multiple streams are used, trtexec can run inference in two modes:
  // (1) if inference.threads is true, then run each stream on each thread.
  // (2) if inference.threads is false, then run all streams on the same thread.
  const int32_t numThreads = inference.threads ? inference.streams : 1;
  const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
  std::vector<std::thread> threads;
  for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
    threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
                                    streamsPerThread, device, trace));
  }
  for (auto& th : threads) {
    th.join();
  }
  cudaCheck(cudaProfilerStop());
  auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
    return a.h2dStart < b.h2dStart;
  };
  std::sort(trace.begin(), trace.end(), cmpTrace);
  return !iEnv.error;
 }
 namespace {
 size_t reportGpuMemory() {
  static size_t prevFree{0};
  size_t free{0};
  size_t total{0};
  size_t newlyAllocated{0};
  cudaCheck(cudaMemGetInfo(&free, &total));
  sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
  if (prevFree != 0) {
    newlyAllocated = (prevFree - free);
    sample::gLogInfo << ", newly allocated GPU memory = "
                     << newlyAllocated / 1024.0_MiB << " GiB";
  }
  sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
                   << std::endl;
  prevFree = free;
  return newlyAllocated;
 }
 } // namespace
 //! Returns true if deserialization is slower than expected or fails.
 bool timeDeserialize(InferenceEnvironment& iEnv) {
  constexpr int32_t kNB_ITERS{20};
  std::unique_ptr<IRuntime> rt{
      createInferRuntime(sample::gLogger.getTRTLogger())};
  std::unique_ptr<ICudaEngine> engine;
  std::unique_ptr<safe::IRuntime> safeRT{
      sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
  std::unique_ptr<safe::ICudaEngine> safeEngine;
  if (iEnv.safe) {
    ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
    safeRT->setErrorRecorder(&gRecorder);
  }
  auto timeDeserializeFn = [&]() -> float {
    bool deserializeOK{false};
    engine.reset(nullptr);
    safeEngine.reset(nullptr);
    auto startClock = std::chrono::high_resolution_clock::now();
    if (iEnv.safe) {
      safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
                                                     iEnv.engineBlob.size()));
      deserializeOK = (safeEngine != nullptr);
    } else {
      engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
                                             iEnv.engineBlob.size(), nullptr));
      deserializeOK = (engine != nullptr);
    }
    auto endClock = std::chrono::high_resolution_clock::now();
    // return NAN if deserialization failed.
    return deserializeOK
               ? std::chrono::duration<float, std::milli>(endClock - startClock)
                     .count()
               : NAN;
  };
  // Warmup the caches to make sure that cache thrashing isn't throwing off the
  // results
  {
    sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
    for (int32_t i = 0, e = 2; i < e; ++i) {
      timeDeserializeFn();
    }
  }
  sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
  float const first = timeDeserializeFn();
  // Check if first deserialization suceeded.
  if (std::isnan(first)) {
    sample::gLogError << "Engine deserialization failed." << std::endl;
    return true;
  }
  sample::gLogInfo << "First deserialization time = " << first
                   << " milliseconds" << std::endl;
  // Record initial gpu memory state.
  reportGpuMemory();
  float totalTime{0.F};
  for (int32_t i = 0; i < kNB_ITERS; ++i) {
    totalTime += timeDeserializeFn();
  }
  const auto averageTime = totalTime / kNB_ITERS;
  // reportGpuMemory sometimes reports zero after a single deserialization of a
  // small engine,
  // so use the size of memory for all the iterations.
  const auto totalEngineSizeGpu = reportGpuMemory();
  sample::gLogInfo << "Total deserialization time = " << totalTime
                   << " milliseconds in " << kNB_ITERS
                   << " iterations, average time = " << averageTime
                   << " milliseconds, first time = " << first
                   << " milliseconds." << std::endl;
  sample::gLogInfo << "Deserialization Bandwidth = "
                   << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
                   << std::endl;
  // If the first deserialization is more than tolerance slower than
  // the average deserialization, return true, which means an error occurred.
  // The tolerance is set to 2x since the deserialization time is quick and
  // susceptible
  // to caching issues causing problems in the first timing.
  const auto tolerance = 2.0F;
  const bool isSlowerThanExpected = first > averageTime * tolerance;
  if (isSlowerThanExpected) {
    sample::gLogInfo << "First deserialization time divided by average time is "
                     << (first / averageTime) << ". Exceeds tolerance of "
                     << tolerance << "x." << std::endl;
  }
  return isSlowerThanExpected;
 }
 std::string getLayerInformation(const InferenceEnvironment& iEnv,
                                nvinfer1::LayerInformationFormat format) {
  auto runtime = std::unique_ptr<IRuntime>(
      createInferRuntime(sample::gLogger.getTRTLogger()));
  auto inspector =
      std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
  if (!iEnv.context.empty()) {
    inspector->setExecutionContext(iEnv.context.front().get());
  }
  std::string result = inspector->getEngineInformation(format);
  return result;
 }
 } // namespace sample
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleInference.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleInference.h
@@ -1,88 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_INFERENCE_H
 #define TRT_SAMPLE_INFERENCE_H
 #include "sampleReporting.h"
 #include "sampleUtils.h"
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 #include "NvInfer.h"
 #include "NvInferSafeRuntime.h"
 namespace sample {
 struct InferenceEnvironment {
  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
  std::unique_ptr<Profiler> profiler;
  std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
  std::vector<std::unique_ptr<Bindings>> bindings;
  bool error{false};
  std::vector<uint8_t> engineBlob;
  bool safe{false};
  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
  std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
  template <class ContextType>
  inline ContextType* getContext(int32_t streamIdx);
 };
 template <>
 inline nvinfer1::IExecutionContext*
 InferenceEnvironment::getContext(int32_t streamIdx) {
  return context[streamIdx].get();
 }
 template <>
 inline nvinfer1::safe::IExecutionContext*
 InferenceEnvironment::getContext(int32_t streamIdx) {
  return safeContext[streamIdx].get();
 }
 //!
 //! \brief Set up contexts and bindings for inference
 //!
 bool setUpInference(InferenceEnvironment& iEnv,
                    const InferenceOptions& inference);
 //!
 //! \brief Deserialize the engine and time how long it takes.
 //!
 bool timeDeserialize(InferenceEnvironment& iEnv);
 //!
 //! \brief Run inference and collect timing, return false if any error hit
 //! during inference
 //!
 bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
                  int32_t device, std::vector<InferenceTrace>& trace);
 //!
 //! \brief Get layer information of the engine.
 //!
 std::string getLayerInformation(const InferenceEnvironment& iEnv,
                                nvinfer1::LayerInformationFormat format);
 } // namespace sample
 #endif // TRT_SAMPLE_INFERENCE_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleOptions.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleOptions.h
@@ -1,311 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_OPTIONS_H
 #define TRT_SAMPLE_OPTIONS_H
 #include <algorithm>
 #include <array>
 #include <iostream>
 #include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include "NvInfer.h"
 namespace sample {
 // Build default params
 constexpr int32_t maxBatchNotProvided{0};
 constexpr int32_t defaultMinTiming{1};
 constexpr int32_t defaultAvgTiming{8};
 // System default params
 constexpr int32_t defaultDevice{0};
 // Inference default params
 constexpr int32_t defaultBatch{1};
 constexpr int32_t batchNotProvided{0};
 constexpr int32_t defaultStreams{1};
 constexpr int32_t defaultIterations{10};
 constexpr float defaultWarmUp{200.F};
 constexpr float defaultDuration{3.F};
 constexpr float defaultSleep{};
 constexpr float defaultIdle{};
 // Reporting default params
 constexpr int32_t defaultAvgRuns{10};
 constexpr float defaultPercentile{99};
 enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
 enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
 enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
 enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
 using Arguments = std::unordered_multimap<std::string, std::string>;
 using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
 using ShapeRange =
    std::array<std::vector<int32_t>,
               nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
 using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
 using LayerOutputTypes =
    std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
 struct Options {
  virtual void parse(Arguments& arguments) = 0;
 };
 struct BaseModelOptions : public Options {
  ModelFormat format{ModelFormat::kANY};
  std::string model;
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct UffInput : public Options {
  std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
  bool NHWC{false};
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct ModelOptions : public Options {
  BaseModelOptions baseModel;
  std::string prototxt;
  std::vector<std::string> outputs;
  UffInput uffInputs;
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct BuildOptions : public Options {
  int32_t maxBatch{maxBatchNotProvided};
  double workspace{-1.0};
  double dlaSRAM{-1.0};
  double dlaLocalDRAM{-1.0};
  double dlaGlobalDRAM{-1.0};
  int32_t minTiming{defaultMinTiming};
  int32_t avgTiming{defaultAvgTiming};
  bool tf32{true};
  bool fp16{false};
  bool int8{false};
  bool directIO{false};
  PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
  LayerPrecisions layerPrecisions;
  LayerOutputTypes layerOutputTypes;
  bool safe{false};
  bool consistency{false};
  bool restricted{false};
  bool save{false};
  bool load{false};
  bool refittable{false};
  SparsityFlag sparsity{SparsityFlag::kDISABLE};
  nvinfer1::ProfilingVerbosity profilingVerbosity{
      nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
  std::string engine;
  std::string calibration;
  std::unordered_map<std::string, ShapeRange> shapes;
  std::unordered_map<std::string, ShapeRange> shapesCalib;
  std::vector<IOFormat> inputFormats;
  std::vector<IOFormat> outputFormats;
  nvinfer1::TacticSources enabledTactics{0};
  nvinfer1::TacticSources disabledTactics{0};
  TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
  std::string timingCacheFile{};
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct SystemOptions : public Options {
  int32_t device{defaultDevice};
  int32_t DLACore{-1};
  bool fallback{false};
  std::vector<std::string> plugins;
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct InferenceOptions : public Options {
  int32_t batch{batchNotProvided};
  int32_t iterations{defaultIterations};
  int32_t streams{defaultStreams};
  float warmup{defaultWarmUp};
  float duration{defaultDuration};
  float sleep{defaultSleep};
  float idle{defaultIdle};
  bool overlap{true};
  bool skipTransfers{false};
  bool useManaged{false};
  bool spin{false};
  bool threads{false};
  bool graph{false};
  bool skip{false};
  bool rerun{false};
  bool timeDeserialize{false};
  bool timeRefit{false};
  std::unordered_map<std::string, std::string> inputs;
  std::unordered_map<std::string, std::vector<int32_t>> shapes;
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct ReportingOptions : public Options {
  bool verbose{false};
  int32_t avgs{defaultAvgRuns};
  float percentile{defaultPercentile};
  bool refit{false};
  bool output{false};
  bool profile{false};
  bool layerInfo{false};
  std::string exportTimes;
  std::string exportOutput;
  std::string exportProfile;
  std::string exportLayerInfo;
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 struct SafeBuilderOptions : public Options {
  std::string serialized{};
  std::string onnxModelFile{};
  bool help{false};
  bool verbose{false};
  std::vector<IOFormat> inputFormats;
  std::vector<IOFormat> outputFormats;
  bool int8{false};
  std::string calibFile{};
  std::vector<std::string> plugins;
  bool consistency{false};
  bool standard{false};
  void parse(Arguments& arguments) override;
  static void printHelp(std::ostream& out);
 };
 struct AllOptions : public Options {
  ModelOptions model;
  BuildOptions build;
  SystemOptions system;
  InferenceOptions inference;
  ReportingOptions reporting;
  bool helps{false};
  void parse(Arguments& arguments) override;
  static void help(std::ostream& out);
 };
 Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
 bool parseHelp(Arguments& arguments);
 void helpHelp(std::ostream& out);
 // Functions to print options
 std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
 std::ostream& operator<<(std::ostream& os, const UffInput& input);
 std::ostream& operator<<(std::ostream& os, const IOFormat& format);
 std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
 std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
 std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
 std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
 std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
 std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
 std::ostream& operator<<(std::ostream& os, const AllOptions& options);
 std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
 inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
  for (int32_t i = 0; i < dims.nbDims; ++i) {
    os << (i ? "x" : "") << dims.d[i];
  }
  return os;
 }
 inline std::ostream& operator<<(std::ostream& os,
                                const nvinfer1::WeightsRole role) {
  switch (role) {
  case nvinfer1::WeightsRole::kKERNEL: {
    os << "Kernel";
    break;
  }
  case nvinfer1::WeightsRole::kBIAS: {
    os << "Bias";
    break;
  }
  case nvinfer1::WeightsRole::kSHIFT: {
    os << "Shift";
    break;
  }
  case nvinfer1::WeightsRole::kSCALE: {
    os << "Scale";
    break;
  }
  case nvinfer1::WeightsRole::kCONSTANT: {
    os << "Constant";
    break;
  }
  case nvinfer1::WeightsRole::kANY: {
    os << "Any";
    break;
  }
  }
  return os;
 }
 inline std::ostream& operator<<(std::ostream& os,
                                const std::vector<int32_t>& vec) {
  for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
    os << (i ? "x" : "") << vec[i];
  }
  return os;
 }
 } // namespace sample
 #endif // TRT_SAMPLES_OPTIONS_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
@@ -1,480 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <algorithm>
 #include <exception>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <numeric>
 #include <utility>
 #include "sampleInference.h"
 #include "sampleOptions.h"
 #include "sampleReporting.h"
 using namespace nvinfer1;
 namespace sample {
 namespace {
 //!
 //! \brief Find percentile in an ascending sequence of timings
 //! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
 //!
 template <typename T>
 float findPercentile(float percentile,
                     std::vector<InferenceTime> const& timings,
                     T const& toFloat) {
  int32_t const all = static_cast<int32_t>(timings.size());
  int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
  if (timings.empty()) {
    return std::numeric_limits<float>::infinity();
  }
  if (percentile < 0.0f || percentile > 100.0f) {
    throw std::runtime_error("percentile is not in [0, 100]!");
  }
  return toFloat(timings[std::max(all - 1 - exclude, 0)]);
 }
 //!
 //! \brief Find median in a sorted sequence of timings
 //!
 template <typename T>
 float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
  if (timings.empty()) {
    return std::numeric_limits<float>::infinity();
  }
  int32_t const m = timings.size() / 2;
  if (timings.size() % 2) {
    return toFloat(timings[m]);
  }
  return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
 }
 //!
 //! \brief Find coefficient of variance (which is std / mean) in a sorted
 //! sequence of timings given the mean
 //!
 template <typename T>
 float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
                          T const& toFloat, float mean) {
  if (timings.empty()) {
    return 0;
  }
  if (mean == 0.F) {
    return std::numeric_limits<float>::infinity();
  }
  auto const metricAccumulator = [toFloat, mean](float acc,
                                                 InferenceTime const& a) {
    float const diff = toFloat(a) - mean;
    return acc + diff * diff;
  };
  float const variance =
      std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
      timings.size();
  return std::sqrt(variance) / mean * 100.F;
 }
 inline InferenceTime traceToTiming(const InferenceTrace& a) {
  return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
                       (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
                       (a.d2hEnd - a.h2dStart));
 }
 } // namespace
 void printProlog(int32_t warmups, int32_t timings, float warmupMs,
                 float benchTimeMs, std::ostream& os) {
  os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
     << std::endl;
  os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
     << " s" << std::endl;
 }
 void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
                 std::ostream& os) {
  int32_t count = 0;
  InferenceTime sum;
  os << std::endl;
  os << "=== Trace details ===" << std::endl;
  os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
  for (auto const& t : timings) {
    sum += t;
    if (++count == runsPerAvg) {
      // clang-format off
            os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
               << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
               << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
      // clang-format on
      count = 0;
      sum.enq = 0;
      sum.h2d = 0;
      sum.compute = 0;
      sum.d2h = 0;
      sum.e2e = 0;
    }
  }
 }
 void printMetricExplanations(std::ostream& os) {
  os << std::endl;
  os << "=== Explanations of the performance metrics ===" << std::endl;
  os << "Total Host Walltime: the host walltime from when the first query "
        "(after warmups) is enqueued to when the "
        "last query is completed."
     << std::endl;
  os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
     << std::endl;
  os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
        "the queries. If this is significantly "
        "shorter than Total Host Walltime, the GPU may be under-utilized "
        "because of host-side overheads or data "
        "transfers."
     << std::endl;
  os << "Throughput: the observed throughput computed by dividing the number "
        "of queries by the Total Host Walltime. "
        "If this is significantly lower than the reciprocal of GPU Compute "
        "Time, the GPU may be under-utilized "
        "because of host-side overheads or data transfers."
     << std::endl;
  os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
        "than GPU Compute Time, the GPU may be "
        "under-utilized."
     << std::endl;
  os << "H2D Latency: the latency for host-to-device data transfers for input "
        "tensors of a single query."
     << std::endl;
  os << "D2H Latency: the latency for device-to-host data transfers for output "
        "tensors of a single query."
     << std::endl;
  os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
        "Latency. This is the latency to infer a "
        "single query."
     << std::endl;
  os << "End-to-End Host Latency: the duration from when the H2D of a query is "
        "called to when the D2H of the same "
        "query is completed, which includes the latency to wait for the "
        "completion of the previous query. This is "
        "the latency of a query if multiple queries are enqueued consecutively."
     << std::endl;
 }
 PerformanceResult
 getPerformanceResult(std::vector<InferenceTime> const& timings,
                     std::function<float(InferenceTime const&)> metricGetter,
                     float percentile) {
  auto const metricComparator = [metricGetter](InferenceTime const& a,
                                               InferenceTime const& b) {
    return metricGetter(a) < metricGetter(b);
  };
  auto const metricAccumulator = [metricGetter](float acc,
                                                InferenceTime const& a) {
    return acc + metricGetter(a);
  };
  std::vector<InferenceTime> newTimings = timings;
  std::sort(newTimings.begin(), newTimings.end(), metricComparator);
  PerformanceResult result;
  result.min = metricGetter(newTimings.front());
  result.max = metricGetter(newTimings.back());
  result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
                                metricAccumulator) /
                newTimings.size();
  result.median = findMedian(newTimings, metricGetter);
  result.percentile = findPercentile(percentile, newTimings, metricGetter);
  result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
  return result;
 }
 void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
                 float percentile, int32_t batchSize, std::ostream& osInfo,
                 std::ostream& osWarning, std::ostream& osVerbose) {
  float const throughput = batchSize * timings.size() / walltimeMs * 1000;
  auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
  auto const latencyResult =
      getPerformanceResult(timings, getLatency, percentile);
  auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
  auto const e2eLatencyResult =
      getPerformanceResult(timings, getEndToEnd, percentile);
  auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
  auto const enqueueResult =
      getPerformanceResult(timings, getEnqueue, percentile);
  auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
  auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
  auto const getCompute = [](InferenceTime const& t) { return t.compute; };
  auto const gpuComputeResult =
      getPerformanceResult(timings, getCompute, percentile);
  auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
  auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
  auto const toPerfString = [percentile](const PerformanceResult& r) {
    std::stringstream s;
    s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
      << " ms, "
      << "median = " << r.median << " ms, percentile(" << percentile
      << "%) = " << r.percentile << " ms";
    return s.str();
  };
  osInfo << std::endl;
  osInfo << "=== Performance summary ===" << std::endl;
  osInfo << "Throughput: " << throughput << " qps" << std::endl;
  osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
  osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
         << std::endl;
  osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
  osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
  osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
  osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
  osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
  osInfo << "Total GPU Compute Time: "
         << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
  // Report warnings if the throughput is bound by other factors than GPU
  // Compute Time.
  constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
  if (enqueueResult.median >
      kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
    osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
                 "Compute and the GPU may be under-utilized."
              << std::endl;
    osWarning << "  If not already in use, --useCudaGraph (utilize CUDA graphs "
                 "where possible) may increase the "
                 "throughput."
              << std::endl;
  }
  if (h2dResult.median >= gpuComputeResult.median) {
    osWarning << "* Throughput may be bound by host-to-device transfers for "
                 "the inputs rather than GPU Compute and "
                 "the GPU may be under-utilized."
              << std::endl;
    osWarning << "  Add --noDataTransfers flag to disable data transfers."
              << std::endl;
  }
  if (d2hResult.median >= gpuComputeResult.median) {
    osWarning << "* Throughput may be bound by device-to-host transfers for "
                 "the outputs rather than GPU Compute "
                 "and the GPU may be under-utilized."
              << std::endl;
    osWarning << "  Add --noDataTransfers flag to disable data transfers."
              << std::endl;
  }
  // Report warnings if the GPU Compute Time is unstable.
  constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
  if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
    osWarning
        << "* GPU compute time is unstable, with coefficient of variance = "
        << gpuComputeResult.coeffVar << "%." << std::endl;
    osWarning << "  If not already in use, locking GPU clock frequency or "
                 "adding --useSpinWait may improve the "
              << "stability." << std::endl;
  }
  // Explain what the metrics mean.
  osInfo << "Explanations of the performance metrics are printed in the "
            "verbose logs."
         << std::endl;
  printMetricExplanations(osVerbose);
  osInfo << std::endl;
 }
 void printPerformanceReport(std::vector<InferenceTrace> const& trace,
                            const ReportingOptions& reporting, float warmupMs,
                            int32_t batchSize, std::ostream& osInfo,
                            std::ostream& osWarning, std::ostream& osVerbose) {
  auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
    return a.computeStart >= warmupMs;
  };
  auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
  int32_t const warmups = noWarmup - trace.begin();
  float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
  // when implicit batch used, batchSize = options.inference.batch, which is
  // parsed through --batch
  // when explicit batch used, batchSize = options.inference.batch = 0
  // treat inference with explicit batch as a single query and report the
  // throughput
  batchSize = batchSize ? batchSize : 1;
  printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
              warmupMs, benchTime, osInfo);
  std::vector<InferenceTime> timings(trace.size() - warmups);
  std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
  printTiming(timings, reporting.avgs, osInfo);
  printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
              osWarning, osVerbose);
  if (!reporting.exportTimes.empty()) {
    exportJSONTrace(trace, reporting.exportTimes);
  }
 }
 //! Printed format:
 //! [ value, ...]
 //! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
 //! h2d" : time, "start compute" : time,
 //!             "end compute" : time, "start d2h" : time, "end d2h" : time,
 //!             "h2d" : time, "compute" : time,
 //!             "d2h" : time, "latency" : time, "end to end" : time }
 //!
 void exportJSONTrace(std::vector<InferenceTrace> const& trace,
                     std::string const& fileName) {
  std::ofstream os(fileName, std::ofstream::trunc);
  os << "[" << std::endl;
  char const* sep = "  ";
  for (auto const& t : trace) {
    InferenceTime const it(traceToTiming(t));
    os << sep << "{ ";
    sep = ", ";
    // clang-format off
        os << "\"startEnqMs\" : "     << t.enqStart     << sep << "\"endEnqMs\" : "     << t.enqEnd     << sep
           << "\"startH2dMs\" : "     << t.h2dStart     << sep << "\"endH2dMs\" : "     << t.h2dEnd     << sep
           << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
           << "\"startD2hMs\" : "     << t.d2hStart     << sep << "\"endD2hMs\" : "     << t.d2hEnd     << sep
           << "\"h2dMs\" : "          << it.h2d         << sep << "\"computeMs\" : "    << it.compute   << sep
           << "\"d2hMs\" : "          << it.d2h         << sep << "\"latencyMs\" : "    << it.latency() << sep
           << "\"endToEndMs\" : "     << it.e2e         << " }"                                         << std::endl;
    // clang-format on
  }
  os << "]" << std::endl;
 }
 void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
  if (mIterator == mLayers.end()) {
    bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
    mUpdatesCount += mLayers.empty() || first;
    if (first) {
      mIterator = mLayers.begin();
    } else {
      mLayers.emplace_back();
      mLayers.back().name = layerName;
      mIterator = mLayers.end() - 1;
    }
  }
  mIterator->timeMs += timeMs;
  ++mIterator;
 }
 void Profiler::print(std::ostream& os) const noexcept {
  std::string const nameHdr("Layer");
  std::string const timeHdr("   Time (ms)");
  std::string const avgHdr("   Avg. Time (ms)");
  std::string const percentageHdr("   Time %");
  float const totalTimeMs = getTotalTime();
  auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
    return a.name.size() < b.name.size();
  };
  auto const longestName =
      std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
  auto const nameLength =
      std::max(longestName->name.size() + 1, nameHdr.size());
  auto const timeLength = timeHdr.size();
  auto const avgLength = avgHdr.size();
  auto const percentageLength = percentageHdr.size();
  os << std::endl
     << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
     << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
     << std::endl;
  for (auto const& p : mLayers) {
    // clang-format off
        os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
           << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
           << std::endl;
    }
    {
        os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
           << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
    // clang-format on
  }
  os << std::endl;
 }
 void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
  std::ofstream os(fileName, std::ofstream::trunc);
  os << "[" << std::endl
     << "  { \"count\" : " << mUpdatesCount << " }" << std::endl;
  auto const totalTimeMs = getTotalTime();
  for (auto const& l : mLayers) {
    // clang-format off
        os << ", {" << " \"name\" : \""      << l.name << "\""
                       ", \"timeMs\" : "     << l.timeMs
           <<          ", \"averageMs\" : "  << l.timeMs / mUpdatesCount
           <<          ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
           << " }"  << std::endl;
    // clang-format on
  }
  os << "]" << std::endl;
 }
 void dumpInputs(nvinfer1::IExecutionContext const& context,
                Bindings const& bindings, std::ostream& os) {
  os << "Input Tensors:" << std::endl;
  bindings.dumpInputs(context, os);
 }
 void dumpOutputs(nvinfer1::IExecutionContext const& context,
                 Bindings const& bindings, std::ostream& os) {
  os << "Output Tensors:" << std::endl;
  bindings.dumpOutputs(context, os);
 }
 void exportJSONOutput(nvinfer1::IExecutionContext const& context,
                      Bindings const& bindings, std::string const& fileName,
                      int32_t batch) {
  std::ofstream os(fileName, std::ofstream::trunc);
  std::string sep = "  ";
  auto const output = bindings.getOutputBindings();
  os << "[" << std::endl;
  for (auto const& binding : output) {
    // clang-format off
        os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
        sep = ", ";
        os << "  " << sep << "\"dimensions\" : \"";
        bindings.dumpBindingDimensions(binding.second, context, os);
        os << "\"" << std::endl;
        os << "  " << sep << "\"values\" : [ ";
        bindings.dumpBindingValues(context, binding.second, os, sep, batch);
        os << " ]" << std::endl << "  }" << std::endl;
    // clang-format on
  }
  os << "]" << std::endl;
 }
 } // namespace sample
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleReporting.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleReporting.h
@@ -1,211 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_REPORTING_H
 #define TRT_SAMPLE_REPORTING_H
 #include <functional>
 #include <iostream>
 #include "NvInfer.h"
 #include "sampleOptions.h"
 #include "sampleUtils.h"
 namespace sample {
 //!
 //! \struct InferenceTime
 //! \brief Measurement times in milliseconds
 //!
 struct InferenceTime {
  InferenceTime(float q, float i, float c, float o, float e)
      : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
  InferenceTime() = default;
  InferenceTime(InferenceTime const&) = default;
  InferenceTime(InferenceTime&&) = default;
  InferenceTime& operator=(InferenceTime const&) = default;
  InferenceTime& operator=(InferenceTime&&) = default;
  ~InferenceTime() = default;
  float enq{0};     // Enqueue
  float h2d{0};     // Host to Device
  float compute{0}; // Compute
  float d2h{0};     // Device to Host
  float e2e{0};     // end to end
  // ideal latency
  float latency() const { return h2d + compute + d2h; }
 };
 //!
 //! \struct InferenceTrace
 //! \brief Measurement points in milliseconds
 //!
 struct InferenceTrace {
  InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
                 float ce, float os, float oe)
      : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
        computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
  InferenceTrace() = default;
  InferenceTrace(InferenceTrace const&) = default;
  InferenceTrace(InferenceTrace&&) = default;
  InferenceTrace& operator=(InferenceTrace const&) = default;
  InferenceTrace& operator=(InferenceTrace&&) = default;
  ~InferenceTrace() = default;
  int32_t stream{0};
  float enqStart{0};
  float enqEnd{0};
  float h2dStart{0};
  float h2dEnd{0};
  float computeStart{0};
  float computeEnd{0};
  float d2hStart{0};
  float d2hEnd{0};
 };
 inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
  return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
                       a.d2h + b.d2h, a.e2e + b.e2e);
 }
 inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
  return a = a + b;
 }
 //!
 //! \struct PerformanceResult
 //! \brief Performance result of a performance metric
 //!
 struct PerformanceResult {
  float min{0};
  float max{0};
  float mean{0};
  float median{0};
  float percentile{0};
  float coeffVar{0}; // coefficient of variation
 };
 //!
 //! \brief Print benchmarking time and number of traces collected
 //!
 void printProlog(int32_t warmups, int32_t timings, float warmupMs,
                 float walltime, std::ostream& os);
 //!
 //! \brief Print a timing trace
 //!
 void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
                 std::ostream& os);
 //!
 //! \brief Print the performance summary of a trace
 //!
 void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
                 int32_t batchSize, std::ostream& osInfo,
                 std::ostream& osWarning, std::ostream& osVerbose);
 //!
 //! \brief Get the result of a specific performance metric from a trace
 //!
 PerformanceResult
 getPerformanceResult(std::vector<InferenceTime> const& timings,
                     std::function<float(InferenceTime const&)> metricGetter,
                     float percentile);
 //!
 //! \brief Print the explanations of the performance metrics printed in
 //! printEpilog() function.
 //!
 void printMetricExplanations(std::ostream& os);
 //!
 //! \brief Print and summarize a timing trace
 //!
 void printPerformanceReport(std::vector<InferenceTrace> const& trace,
                            ReportingOptions const& reporting, float warmupMs,
                            int32_t batchSize, std::ostream& osInfo,
                            std::ostream& osWarning, std::ostream& osVerbose);
 //!
 //! \brief Export a timing trace to JSON file
 //!
 void exportJSONTrace(std::vector<InferenceTrace> const& trace,
                     std::string const& fileName);
 //!
 //! \brief Print input tensors to stream
 //!
 void dumpInputs(nvinfer1::IExecutionContext const& context,
                Bindings const& bindings, std::ostream& os);
 //!
 //! \brief Print output tensors to stream
 //!
 void dumpOutputs(nvinfer1::IExecutionContext const& context,
                 Bindings const& bindings, std::ostream& os);
 //!
 //! \brief Export output tensors to JSON file
 //!
 void exportJSONOutput(nvinfer1::IExecutionContext const& context,
                      Bindings const& bindings, std::string const& fileName,
                      int32_t batch);
 //!
 //! \struct LayerProfile
 //! \brief Layer profile information
 //!
 struct LayerProfile {
  std::string name;
  float timeMs{0};
 };
 //!
 //! \class Profiler
 //! \brief Collect per-layer profile information, assuming times are reported in
 //! the same order
 //!
 class Profiler : public nvinfer1::IProfiler {
 public:
  void reportLayerTime(char const* layerName, float timeMs) noexcept override;
  void print(std::ostream& os) const noexcept;
  //!
  //! \brief Export a profile to JSON file
  //!
  void exportJSONProfile(std::string const& fileName) const noexcept;
 private:
  float getTotalTime() const noexcept {
    auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
      return accumulator + lp.timeMs;
    };
    return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
  }
  std::vector<LayerProfile> mLayers;
  std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
  int32_t mUpdatesCount{0};
 };
 } // namespace sample
 #endif // TRT_SAMPLE_REPORTING_H
--- a/csrc/fastdeploy/backends/tensorrt/common/sampleUtils.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/sampleUtils.h
@@ -1,494 +0,0 @@
 /*
 * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef TRT_SAMPLE_UTILS_H
 #define TRT_SAMPLE_UTILS_H
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <numeric>
 #include <random>
 #include <unordered_map>
 #include <vector>
 #include <cuda.h>
 #include <cuda_fp16.h>
 #include "NvInfer.h"
 #include "common.h"
 #include "logger.h"
 #include "sampleDevice.h"
 #include "sampleOptions.h"
 namespace sample {
 inline int dataTypeSize(nvinfer1::DataType dataType) {
  switch (dataType) {
  case nvinfer1::DataType::kINT32:
  case nvinfer1::DataType::kFLOAT:
    return 4;
  case nvinfer1::DataType::kHALF:
    return 2;
  case nvinfer1::DataType::kBOOL:
  case nvinfer1::DataType::kINT8:
    return 1;
  }
  return 0;
 }
 template <typename T> inline T roundUp(T m, T n) {
  return ((m + n - 1) / n) * n;
 }
 inline int volume(const nvinfer1::Dims& d) {
  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
 }
 //! comps is the number of components in a vector. Ignored if vecDim < 0.
 inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
                      int vecDim, int comps, int batch) {
  int maxNbElems = 1;
  for (int i = 0; i < dims.nbDims; ++i) {
    // Get effective length of axis.
    int d = dims.d[i];
    // Any dimension is 0, it is an empty tensor.
    if (d == 0) {
      return 0;
    }
    if (i == vecDim) {
      d = samplesCommon::divUp(d, comps);
    }
    maxNbElems = std::max(maxNbElems, d * strides.d[i]);
  }
  return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
 }
 inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
  if (vecDim != -1) {
    dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
  }
  return volume(dims) * std::max(batch, 1);
 }
 inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
  int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
  if (static_cast<int>(vec.size()) > limit) {
    sample::gLogWarning
        << "Vector too long, only first 8 elements are used in dimension."
        << std::endl;
  }
  // Pick first nvinfer1::Dims::MAX_DIMS elements
  nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
  std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
  return dims;
 }
 template <typename T>
 inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
  T* typedBuffer = static_cast<T*>(buffer);
  std::default_random_engine engine;
  if (std::is_integral<T>::value) {
    std::uniform_int_distribution<int> distribution(min, max);
    auto generator = [&engine, &distribution]() {
      return static_cast<T>(distribution(engine));
    };
    std::generate(typedBuffer, typedBuffer + volume, generator);
  } else {
    std::uniform_real_distribution<float> distribution(min, max);
    auto generator = [&engine, &distribution]() {
      return static_cast<T>(distribution(engine));
    };
    std::generate(typedBuffer, typedBuffer + volume, generator);
  }
 }
 // Specialization needed for custom type __half
 template <typename H>
 inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
  H* typedBuffer = static_cast<H*>(buffer);
  std::default_random_engine engine;
  std::uniform_real_distribution<float> distribution(min, max);
  auto generator = [&engine, &distribution]() {
    return static_cast<H>(distribution(engine));
  };
  std::generate(typedBuffer, typedBuffer + volume, generator);
 }
 template <>
 inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
                               __half max) {
  fillBufferHalf(buffer, volume, min, max);
 }
 template <typename T>
 inline void dumpBuffer(const void* buffer, const std::string& separator,
                       std::ostream& os, const Dims& dims, const Dims& strides,
                       int32_t vectorDim, int32_t spv) {
  const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
                                         std::multiplies<int64_t>());
  const T* typedBuffer = static_cast<const T*>(buffer);
  std::string sep;
  for (int64_t v = 0; v < volume; ++v) {
    int64_t curV = v;
    int32_t dataOffset = 0;
    for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
      int32_t dimVal = curV % dims.d[dimIndex];
      if (dimIndex == vectorDim) {
        dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
      } else {
        dataOffset +=
            dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
      }
      curV /= dims.d[dimIndex];
      ASSERT(curV >= 0);
    }
    os << sep << typedBuffer[dataOffset];
    sep = separator;
  }
 }
 inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
  ASSERT(dst);
  std::ifstream file(fileName, std::ios::in | std::ios::binary);
  if (file.is_open()) {
    file.read(dst, size);
    file.close();
  } else {
    std::stringstream msg;
    msg << "Cannot open file " << fileName << "!";
    throw std::invalid_argument(msg.str());
  }
 }
 struct Binding {
  bool isInput{false};
  std::unique_ptr<IMirroredBuffer> buffer;
  int64_t volume{0};
  nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
  void fill(const std::string& fileName) {
    loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
                 buffer->getSize());
  }
  void fill() {
    switch (dataType) {
    case nvinfer1::DataType::kBOOL: {
      fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
      break;
    }
    case nvinfer1::DataType::kINT32: {
      fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
      break;
    }
    case nvinfer1::DataType::kINT8: {
      fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
      break;
    }
    case nvinfer1::DataType::kFLOAT: {
      fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
      break;
    }
    case nvinfer1::DataType::kHALF: {
      fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
      break;
    }
    }
  }
  void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
            int32_t spv, const std::string separator = " ") const {
    switch (dataType) {
    case nvinfer1::DataType::kBOOL: {
      dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
                       vectorDim, spv);
      break;
    }
    case nvinfer1::DataType::kINT32: {
      dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
                          vectorDim, spv);
      break;
    }
    case nvinfer1::DataType::kINT8: {
      dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
                         vectorDim, spv);
      break;
    }
    case nvinfer1::DataType::kFLOAT: {
      dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
                        vectorDim, spv);
      break;
    }
    case nvinfer1::DataType::kHALF: {
      dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
                         vectorDim, spv);
      break;
    }
    }
  }
 };
 class Bindings {
 public:
  Bindings() = delete;
  explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
  void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
                  nvinfer1::DataType dataType,
                  const std::string& fileName = "") {
    while (mBindings.size() <= static_cast<size_t>(b)) {
      mBindings.emplace_back();
      mDevicePointers.emplace_back();
    }
    mNames[name] = b;
    if (mBindings[b].buffer == nullptr) {
      if (mUseManaged) {
        mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
      } else {
        mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
      }
    }
    mBindings[b].isInput = isInput;
    // Some memory allocators return nullptr when allocating zero bytes, but
    // TensorRT requires a non-null ptr
    // even for empty tensors, so allocate a dummy byte.
    if (volume == 0) {
      mBindings[b].buffer->allocate(1);
    } else {
      mBindings[b].buffer->allocate(
          static_cast<size_t>(volume) *
          static_cast<size_t>(dataTypeSize(dataType)));
    }
    mBindings[b].volume = volume;
    mBindings[b].dataType = dataType;
    mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
    if (isInput) {
      if (fileName.empty()) {
        fill(b);
      } else {
        fill(b, fileName);
      }
    }
  }
  void** getDeviceBuffers() { return mDevicePointers.data(); }
  void transferInputToDevice(TrtCudaStream& stream) {
    for (auto& b : mNames) {
      if (mBindings[b.second].isInput) {
        mBindings[b.second].buffer->hostToDevice(stream);
      }
    }
  }
  void transferOutputToHost(TrtCudaStream& stream) {
    for (auto& b : mNames) {
      if (!mBindings[b.second].isInput) {
        mBindings[b.second].buffer->deviceToHost(stream);
      }
    }
  }
  void fill(int binding, const std::string& fileName) {
    mBindings[binding].fill(fileName);
  }
  void fill(int binding) { mBindings[binding].fill(); }
  void dumpBindingDimensions(int binding,
                             const nvinfer1::IExecutionContext& context,
                             std::ostream& os) const {
    const auto dims = context.getBindingDimensions(binding);
    // Do not add a newline terminator, because the caller may be outputting a
    // JSON string.
    os << dims;
  }
  void dumpBindingValues(const nvinfer1::IExecutionContext& context,
                         int binding, std::ostream& os,
                         const std::string& separator = " ",
                         int32_t batch = 1) const {
    Dims dims = context.getBindingDimensions(binding);
    Dims strides = context.getStrides(binding);
    int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
    const int32_t spv =
        context.getEngine().getBindingComponentsPerElement(binding);
    if (context.getEngine().hasImplicitBatchDimension()) {
      auto insertN = [](Dims& d, int32_t bs) {
        const int32_t nbDims = d.nbDims;
        ASSERT(nbDims < Dims::MAX_DIMS);
        std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
        d.d[0] = bs;
        d.nbDims = nbDims + 1;
      };
      int32_t batchStride = 0;
      for (int32_t i = 0; i < strides.nbDims; ++i) {
        if (strides.d[i] * dims.d[i] > batchStride) {
          batchStride = strides.d[i] * dims.d[i];
        }
      }
      insertN(dims, batch);
      insertN(strides, batchStride);
      vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
    }
    mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
  }
  void dumpInputs(const nvinfer1::IExecutionContext& context,
                  std::ostream& os) const {
    auto isInput = [](const Binding& b) { return b.isInput; };
    dumpBindings(context, isInput, os);
  }
  void dumpOutputs(const nvinfer1::IExecutionContext& context,
                   std::ostream& os) const {
    auto isOutput = [](const Binding& b) { return !b.isInput; };
    dumpBindings(context, isOutput, os);
  }
  void dumpBindings(const nvinfer1::IExecutionContext& context,
                    std::ostream& os) const {
    auto all = [](const Binding& b) { return true; };
    dumpBindings(context, all, os);
  }
  void dumpBindings(const nvinfer1::IExecutionContext& context,
                    bool (*predicate)(const Binding& b),
                    std::ostream& os) const {
    for (const auto& n : mNames) {
      const auto binding = n.second;
      if (predicate(mBindings[binding])) {
        os << n.first << ": (";
        dumpBindingDimensions(binding, context, os);
        os << ")" << std::endl;
        dumpBindingValues(context, binding, os);
        os << std::endl;
      }
    }
  }
  std::unordered_map<std::string, int> getInputBindings() const {
    auto isInput = [](const Binding& b) { return b.isInput; };
    return getBindings(isInput);
  }
  std::unordered_map<std::string, int> getOutputBindings() const {
    auto isOutput = [](const Binding& b) { return !b.isInput; };
    return getBindings(isOutput);
  }
  std::unordered_map<std::string, int> getBindings() const {
    auto all = [](const Binding& b) { return true; };
    return getBindings(all);
  }
  std::unordered_map<std::string, int>
  getBindings(bool (*predicate)(const Binding& b)) const {
    std::unordered_map<std::string, int> bindings;
    for (const auto& n : mNames) {
      const auto binding = n.second;
      if (predicate(mBindings[binding])) {
        bindings.insert(n);
      }
    }
    return bindings;
  }
 private:
  std::unordered_map<std::string, int32_t> mNames;
  std::vector<Binding> mBindings;
  std::vector<void*> mDevicePointers;
  bool mUseManaged{false};
 };
 template <typename T> struct TrtDestroyer {
  void operator()(T* t) { t->destroy(); }
 };
 template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
 inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
                               size_t nbBindings, bool isInput = true) {
  bool broadcast = formats.size() == 1;
  bool validFormatsCount = broadcast || (formats.size() == nbBindings);
  if (!formats.empty() && !validFormatsCount) {
    if (isInput) {
      throw std::invalid_argument(
          "The number of inputIOFormats must match network's inputs or be one "
          "for broadcasting.");
    } else {
      throw std::invalid_argument(
          "The number of outputIOFormats must match network's outputs or be "
          "one for broadcasting.");
    }
  }
  return broadcast;
 }
 inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
  std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
  if (!iFile) {
    sample::gLogWarning << "Could not read timing cache from: " << inFileName
                        << ". A new timing cache will be generated and written."
                        << std::endl;
    return std::vector<char>();
  }
  iFile.seekg(0, std::ifstream::end);
  size_t fsize = iFile.tellg();
  iFile.seekg(0, std::ifstream::beg);
  std::vector<char> content(fsize);
  iFile.read(content.data(), fsize);
  iFile.close();
  sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
                   << inFileName << std::endl;
  return content;
 }
 inline void saveTimingCacheFile(const std::string outFileName,
                                const IHostMemory* blob) {
  std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
  if (!oFile) {
    sample::gLogWarning << "Could not write timing cache to: " << outFileName
                        << std::endl;
    return;
  }
  oFile.write((char*)blob->data(), blob->size());
  oFile.close();
  sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
                   << outFileName << std::endl;
 }
 inline int32_t getCudaDriverVersion() {
  int32_t version{-1};
  cudaCheck(cudaDriverGetVersion(&version));
  return version;
 }
 inline int32_t getCudaRuntimeVersion() {
  int32_t version{-1};
  cudaCheck(cudaRuntimeGetVersion(&version));
  return version;
 }
 } // namespace sample
 #endif // TRT_SAMPLE_UTILS_H
--- a/csrc/fastdeploy/backends/tensorrt/common/windows/getopt.c
+++ b/csrc/fastdeploy/backends/tensorrt/common/windows/getopt.c
@@ -1,568 +0,0 @@
 /*	$OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $	*/
 /*	$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $	*/
 /*
 * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and Air Force Research Laboratory, Air Force
 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
 */
 /*-
 * Copyright (c) 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Dieter Baron and Thomas Klausner.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
 #include <errno.h>
 #include <getopt.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <windows.h>
 #define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
 #ifdef REPLACE_GETOPT
 int opterr = 1;   /* if error message should be printed */
 int optind = 1;   /* index into parent argv vector */
 int optopt = '?'; /* character checked for validity */
 #undef optreset   /* see getopt.h */
 #define optreset __mingw_optreset
 int optreset; /* reset getopt */
 char* optarg; /* argument associated with option */
 #endif
 #define PRINT_ERROR ((opterr) && (*options != ':'))
 #define FLAG_PERMUTE 0x01  /* permute non-options to the end of argv */
 #define FLAG_ALLARGS 0x02  /* treat non-options as args to option "-1" */
 #define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
 /* return values */
 #define BADCH (int) '?'
 #define BADARG ((*options == ':') ? (int) ':' : (int) '?')
 #define INORDER (int) 1
 #ifndef __CYGWIN__
 #define __progname __argv[0]
 #else
 extern char __declspec(dllimport) * __progname;
 #endif
 #ifdef __CYGWIN__
 static char EMSG[] = "";
 #else
 #define EMSG ""
 #endif
 static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int);
 static int parse_long_options(char* const*, const char*, const struct option*, int*, int);
 static int gcd(int, int);
 static void permute_args(int, int, int, char* const*);
 static char* place = EMSG; /* option letter processing */
 /* XXX: set optreset to 1 rather than these two */
 static int nonopt_start = -1; /* first non option argument (for permute) */
 static int nonopt_end = -1;   /* first option after non options (for permute) */
 /* Error messages */
 static const char recargchar[] = "option requires an argument -- %c";
 static const char recargstring[] = "option requires an argument -- %s";
 static const char ambig[] = "ambiguous option -- %.*s";
 static const char noarg[] = "option doesn't take an argument -- %.*s";
 static const char illoptchar[] = "unknown option -- %c";
 static const char illoptstring[] = "unknown option -- %s";
 static void _vwarnx(const char* fmt, va_list ap)
 {
    (void) fprintf(stderr, "%s: ", __progname);
    if (fmt != NULL)
        (void) vfprintf(stderr, fmt, ap);
    (void) fprintf(stderr, "\n");
 }
 static void warnx(const char* fmt, ...)
 {
    va_list ap;
    va_start(ap, fmt);
    _vwarnx(fmt, ap);
    va_end(ap);
 }
 /*
 * Compute the greatest common divisor of a and b.
 */
 static int gcd(int a, int b)
 {
    int c;
    c = a % b;
    while (c != 0)
    {
        a = b;
        b = c;
        c = a % b;
    }
    return (b);
 }
 /*
 * Exchange the block from nonopt_start to nonopt_end with the block
 * from nonopt_end to opt_end (keeping the same order of arguments
 * in each block).
 */
 static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
 {
    int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
    char* swap;
    /*
     * compute lengths of blocks and number and size of cycles
     */
    nnonopts = panonopt_end - panonopt_start;
    nopts = opt_end - panonopt_end;
    ncycle = gcd(nnonopts, nopts);
    cyclelen = (opt_end - panonopt_start) / ncycle;
    for (i = 0; i < ncycle; i++)
    {
        cstart = panonopt_end + i;
        pos = cstart;
        for (j = 0; j < cyclelen; j++)
        {
            if (pos >= panonopt_end)
                pos -= nnonopts;
            else
                pos += nopts;
            swap = nargv[pos];
            /* LINTED const cast */
            ((char**) nargv)[pos] = nargv[cstart];
            /* LINTED const cast */
            ((char**) nargv)[cstart] = swap;
        }
    }
 }
 /*
 * parse_long_options --
 *	Parse long options in argc/argv argument vector.
 * Returns -1 if short_too is set and the option does not match long_options.
 */
 static int parse_long_options(
    char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too)
 {
    char *current_argv, *has_equal;
    size_t current_argv_len;
    int i, ambiguous, match;
 #define IDENTICAL_INTERPRETATION(_x, _y)                                                                               \
    (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag    \
        && long_options[(_x)].val == long_options[(_y)].val)
    current_argv = place;
    match = -1;
    ambiguous = 0;
    optind++;
    if ((has_equal = strchr(current_argv, '=')) != NULL)
    {
        /* argument found (--option=arg) */
        current_argv_len = has_equal - current_argv;
        has_equal++;
    }
    else
        current_argv_len = strlen(current_argv);
    for (i = 0; long_options[i].name; i++)
    {
        /* find matching long option */
        if (strncmp(current_argv, long_options[i].name, current_argv_len))
            continue;
        if (strlen(long_options[i].name) == current_argv_len)
        {
            /* exact match */
            match = i;
            ambiguous = 0;
            break;
        }
        /*
         * If this is a known short option, don't allow
         * a partial match of a single character.
         */
        if (short_too && current_argv_len == 1)
            continue;
        if (match == -1) /* partial match */
            match = i;
        else if (!IDENTICAL_INTERPRETATION(i, match))
            ambiguous = 1;
    }
    if (ambiguous)
    {
        /* ambiguous abbreviation */
        if (PRINT_ERROR)
            warnx(ambig, (int) current_argv_len, current_argv);
        optopt = 0;
        return (BADCH);
    }
    if (match != -1)
    { /* option found */
        if (long_options[match].has_arg == no_argument && has_equal)
        {
            if (PRINT_ERROR)
                warnx(noarg, (int) current_argv_len, current_argv);
            /*
             * XXX: GNU sets optopt to val regardless of flag
             */
            if (long_options[match].flag == NULL)
                optopt = long_options[match].val;
            else
                optopt = 0;
            return (BADARG);
        }
        if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
        {
            if (has_equal)
                optarg = has_equal;
            else if (long_options[match].has_arg == required_argument)
            {
                /*
                 * optional argument doesn't use next nargv
                 */
                optarg = nargv[optind++];
            }
        }
        if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
        {
            /*
             * Missing argument; leading ':' indicates no error
             * should be generated.
             */
            if (PRINT_ERROR)
                warnx(recargstring, current_argv);
            /*
             * XXX: GNU sets optopt to val regardless of flag
             */
            if (long_options[match].flag == NULL)
                optopt = long_options[match].val;
            else
                optopt = 0;
            --optind;
            return (BADARG);
        }
    }
    else
    { /* unknown option */
        if (short_too)
        {
            --optind;
            return (-1);
        }
        if (PRINT_ERROR)
            warnx(illoptstring, current_argv);
        optopt = 0;
        return (BADCH);
    }
    if (idx)
        *idx = match;
    if (long_options[match].flag)
    {
        *long_options[match].flag = long_options[match].val;
        return (0);
    }
    else
        return (long_options[match].val);
 #undef IDENTICAL_INTERPRETATION
 }
 /*
 * getopt_internal --
 *	Parse argc/argv argument vector.  Called by user level routines.
 */
 static int getopt_internal(
    int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags)
 {
    const char* oli; /* option letter list index */
    int optchar, short_too;
    static int posixly_correct = -1;
    if (options == NULL)
        return (-1);
    /*
     * XXX Some GNU programs (like cvs) set optind to 0 instead of
     * XXX using optreset.  Work around this braindamage.
     */
    if (optind == 0)
        optind = optreset = 1;
    /*
     * Disable GNU extensions if POSIXLY_CORRECT is set or options
     * string begins with a '+'.
     *
     * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
     *                 optreset != 0 for GNU compatibility.
     */
    if (posixly_correct == -1 || optreset != 0)
        posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
    if (*options == '-')
        flags |= FLAG_ALLARGS;
    else if (posixly_correct || *options == '+')
        flags &= ~FLAG_PERMUTE;
    if (*options == '+' || *options == '-')
        options++;
    optarg = NULL;
    if (optreset)
        nonopt_start = nonopt_end = -1;
 start:
    if (optreset || !*place)
    { /* update scanning pointer */
        optreset = 0;
        if (optind >= nargc)
        { /* end of argument vector */
            place = EMSG;
            if (nonopt_end != -1)
            {
                /* do permutation, if we have to */
                permute_args(nonopt_start, nonopt_end, optind, nargv);
                optind -= nonopt_end - nonopt_start;
            }
            else if (nonopt_start != -1)
            {
                /*
                 * If we skipped non-options, set optind
                 * to the first of them.
                 */
                optind = nonopt_start;
            }
            nonopt_start = nonopt_end = -1;
            return (-1);
        }
        if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
        {
            place = EMSG; /* found non-option */
            if (flags & FLAG_ALLARGS)
            {
                /*
                 * GNU extension:
                 * return non-option as argument to option 1
                 */
                optarg = nargv[optind++];
                return (INORDER);
            }
            if (!(flags & FLAG_PERMUTE))
            {
                /*
                 * If no permutation wanted, stop parsing
                 * at first non-option.
                 */
                return (-1);
            }
            /* do permutation */
            if (nonopt_start == -1)
                nonopt_start = optind;
            else if (nonopt_end != -1)
            {
                permute_args(nonopt_start, nonopt_end, optind, nargv);
                nonopt_start = optind - (nonopt_end - nonopt_start);
                nonopt_end = -1;
            }
            optind++;
            /* process next argument */
            goto start;
        }
        if (nonopt_start != -1 && nonopt_end == -1)
            nonopt_end = optind;
        /*
         * If we have "-" do nothing, if "--" we are done.
         */
        if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
        {
            optind++;
            place = EMSG;
            /*
             * We found an option (--), so if we skipped
             * non-options, we have to permute.
             */
            if (nonopt_end != -1)
            {
                permute_args(nonopt_start, nonopt_end, optind, nargv);
                optind -= nonopt_end - nonopt_start;
            }
            nonopt_start = nonopt_end = -1;
            return (-1);
        }
    }
    /*
     * Check long options if:
     *  1) we were passed some
     *  2) the arg is not just "-"
     *  3) either the arg starts with -- we are getopt_long_only()
     */
    if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
    {
        short_too = 0;
        if (*place == '-')
            place++; /* --foo long option */
        else if (*place != ':' && strchr(options, *place) != NULL)
            short_too = 1; /* could be short option too */
        optchar = parse_long_options(nargv, options, long_options, idx, short_too);
        if (optchar != -1)
        {
            place = EMSG;
            return (optchar);
        }
    }
    if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
        || (oli = strchr(options, optchar)) == NULL)
    {
        /*
         * If the user specified "-" and  '-' isn't listed in
         * options, return -1 (non-option) as per POSIX.
         * Otherwise, it is an unknown option character (or ':').
         */
        if (optchar == (int) '-' && *place == '\0')
            return (-1);
        if (!*place)
            ++optind;
        if (PRINT_ERROR)
            warnx(illoptchar, optchar);
        optopt = optchar;
        return (BADCH);
    }
    if (long_options != NULL && optchar == 'W' && oli[1] == ';')
    {
        /* -W long-option */
        if (*place) /* no space */
            /* NOTHING */;
        else if (++optind >= nargc)
        { /* no arg */
            place = EMSG;
            if (PRINT_ERROR)
                warnx(recargchar, optchar);
            optopt = optchar;
            return (BADARG);
        }
        else /* white space */
            place = nargv[optind];
        optchar = parse_long_options(nargv, options, long_options, idx, 0);
        place = EMSG;
        return (optchar);
    }
    if (*++oli != ':')
    { /* doesn't take argument */
        if (!*place)
            ++optind;
    }
    else
    { /* takes (optional) argument */
        optarg = NULL;
        if (*place) /* no white space */
            optarg = place;
        else if (oli[1] != ':')
        { /* arg not optional */
            if (++optind >= nargc)
            { /* no arg */
                place = EMSG;
                if (PRINT_ERROR)
                    warnx(recargchar, optchar);
                optopt = optchar;
                return (BADARG);
            }
            else
                optarg = nargv[optind];
        }
        place = EMSG;
        ++optind;
    }
    /* dump back option letter */
    return (optchar);
 }
 #ifdef REPLACE_GETOPT
 /*
 * getopt --
 *	Parse argc/argv argument vector.
 *
 * [eventually this will replace the BSD getopt]
 */
 int getopt(int nargc, char* const* nargv, const char* options)
 {
    /*
     * We don't pass FLAG_PERMUTE to getopt_internal() since
     * the BSD getopt(3) (unlike GNU) has never done this.
     *
     * Furthermore, since many privileged programs call getopt()
     * before dropping privileges it makes sense to keep things
     * as simple (and bug-free) as possible.
     */
    return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
 }
 #endif /* REPLACE_GETOPT */
 /*
 * getopt_long --
 *	Parse argc/argv argument vector.
 */
 int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
 {
    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
 }
 /*
 * getopt_long_only --
 *	Parse argc/argv argument vector.
 */
 int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
 {
    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
 }
--- a/csrc/fastdeploy/backends/tensorrt/common/windows/getopt.h
+++ b/csrc/fastdeploy/backends/tensorrt/common/windows/getopt.h
@@ -1,124 +0,0 @@
 /*
 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef __GETOPT_H__
 /**
 * DISCLAIMER
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is a part of the w64 mingw-runtime package.
 *
 * The w64 mingw-runtime package and its code is distributed in the hope that it
 * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
 * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
 * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */
 #define __GETOPT_H__
 /* All the headers include this file. */
 #include <crtdefs.h>
 #if defined(WINGETOPT_SHARED_LIB)
 #if defined(BUILDING_WINGETOPT_DLL)
 #define WINGETOPT_API __declspec(dllexport)
 #else
 #define WINGETOPT_API __declspec(dllimport)
 #endif
 #else
 #define WINGETOPT_API
 #endif
 #ifdef __cplusplus
 extern "C"
 {
 #endif
    WINGETOPT_API extern int optind; /* index of first non-option in argv      */
    WINGETOPT_API extern int optopt; /* single option character, as parsed     */
    WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
    /* (user may set to zero, to suppress)    */
    WINGETOPT_API extern char* optarg; /* pointer to argument of current option  */
    extern int getopt(int nargc, char* const* nargv, const char* options);
 #ifdef _BSD_SOURCE
 /*
 * BSD adds the non-standard `optreset' feature, for reinitialisation
 * of `getopt' parsing.  We support this feature, for applications which
 * proclaim their BSD heritage, before including this header; however,
 * to maintain portability, developers are advised to avoid it.
 */
 #define optreset __mingw_optreset
    extern int optreset;
 #endif
 #ifdef __cplusplus
 }
 #endif
 /*
 * POSIX requires the `getopt' API to be specified in `unistd.h';
 * thus, `unistd.h' includes this header.  However, we do not want
 * to expose the `getopt_long' or `getopt_long_only' APIs, when
 * included in this manner.  Thus, close the standard __GETOPT_H__
 * declarations block, and open an additional __GETOPT_LONG_H__
 * specific block, only when *not* __UNISTD_H_SOURCED__, in which
 * to declare the extended API.
 */
 #endif /* !defined(__GETOPT_H__) */
 #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
 #define __GETOPT_LONG_H__
 #ifdef __cplusplus
 extern "C"
 {
 #endif
    struct option /* specification for a long form option...	*/
    {
        const char* name; /* option name, without leading hyphens */
        int has_arg;      /* does it take an argument?		*/
        int* flag;        /* where to save its status, or NULL	*/
        int val;          /* its associated status value		*/
    };
    enum /* permitted values for its `has_arg' field...	*/
    {
        no_argument = 0,   /* option never takes an argument	*/
        required_argument, /* option always requires an argument	*/
        optional_argument  /* option may take an argument		*/
    };
    extern int getopt_long(
        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
    extern int getopt_long_only(
        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
 /*
 * Previous MinGW implementation had...
 */
 #ifndef HAVE_DECL_GETOPT
 /*
 * ...for the long form API only; keep this for compatibility.
 */
 #define HAVE_DECL_GETOPT 1
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
--- a/csrc/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/csrc/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -13,12 +13,17 @@
 // limitations under the License.
 #include "fastdeploy/backends/tensorrt/trt_backend.h"
 #include <cstring>
 #include "NvInferSafeRuntime.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
 namespace fastdeploy {
 FDTrtLogger* FDTrtLogger::logger = nullptr;
 size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
  if (dtype == nvinfer1::DataType::kFLOAT) {
    return sizeof(float);
@@ -130,8 +135,8 @@ bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
  fin.seekg(0, std::ios::beg);
  fin.read(&(engine_buffer.at(0)), engine_buffer.size());
  fin.close();
-  SampleUniquePtr<IRuntime> runtime{
+  FDUniquePtr<nvinfer1::IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
+      nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
  if (!runtime) {
    FDERROR << "Failed to call createInferRuntime()." << std::endl;
    return false;
@@ -139,7 +144,7 @@ bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
      runtime->deserializeCudaEngine(engine_buffer.data(),
                                     engine_buffer.size()),
-      samplesCommon::InferDeleter());
+      FDInferDeleter());
  if (!engine_) {
    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
    return false;
@@ -320,10 +325,10 @@ void TrtBackend::GetInputOutputInfo() {
    auto dtype = engine_->getBindingDataType(i);
    if (engine_->bindingIsInput(i)) {
      inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      inputs_buffer_[name] = DeviceBuffer(dtype);
+      inputs_buffer_[name] = FDDeviceBuffer(dtype);
    } else {
      outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      outputs_buffer_[name] = DeviceBuffer(dtype);
+      outputs_buffer_[name] = FDDeviceBuffer(dtype);
    }
  }
  bindings_.resize(num_binds);
@@ -334,7 +339,7 @@ void TrtBackend::AllocateBufferInDynamicShape(
  for (const auto& item : inputs) {
    auto idx = engine_->getBindingIndex(item.name.c_str());
    std::vector<int> shape(item.shape.begin(), item.shape.end());
-    auto dims = sample::toDims(shape);
+    auto dims = ToDims(shape);
    context_->setBindingDimensions(idx, dims);
    if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
      inputs_buffer_[item.name].resize(dims);
@@ -357,7 +362,7 @@ void TrtBackend::AllocateBufferInDynamicShape(
    (*outputs)[ori_idx].shape.assign(output_dims.d,
                                     output_dims.d + output_dims.nbDims);
    (*outputs)[ori_idx].name = outputs_desc_[i].name;
-    (*outputs)[ori_idx].data.resize(volume(output_dims) *
+    (*outputs)[ori_idx].data.resize(Volume(output_dims) *
                                    TrtDataTypeSize(outputs_desc_[i].dtype));
    if ((*outputs)[ori_idx].Nbytes() >
        outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
@@ -373,19 +378,19 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
      1U << static_cast<uint32_t>(
          nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-  builder_ = SampleUniquePtr<nvinfer1::IBuilder>(
+  builder_ = FDUniquePtr<nvinfer1::IBuilder>(
-      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+      nvinfer1::createInferBuilder(*FDTrtLogger::Get()));
  if (!builder_) {
    FDERROR << "Failed to call createInferBuilder()." << std::endl;
    return false;
  }
-  network_ = SampleUniquePtr<nvinfer1::INetworkDefinition>(
+  network_ = FDUniquePtr<nvinfer1::INetworkDefinition>(
      builder_->createNetworkV2(explicitBatch));
  if (!network_) {
    FDERROR << "Failed to call createNetworkV2()." << std::endl;
    return false;
  }
-  auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(
+  auto config = FDUniquePtr<nvinfer1::IBuilderConfig>(
      builder_->createBuilderConfig());
  if (!config) {
    FDERROR << "Failed to call createBuilderConfig()." << std::endl;
@@ -402,8 +407,8 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
    }
  }
-  parser_ = SampleUniquePtr<nvonnxparser::IParser>(
+  parser_ = FDUniquePtr<nvonnxparser::IParser>(
-      nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger()));
+      nvonnxparser::createParser(*network_, *FDTrtLogger::Get()));
  if (!parser_) {
    FDERROR << "Failed to call createParser()." << std::endl;
    return false;
@@ -429,7 +434,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
      // set min shape
      FDASSERT(profile->setDimensions(item.first.c_str(),
                                      nvinfer1::OptProfileSelector::kMIN,
-                                      sample::toDims(item.second)),
+                                      ToDims(item.second)),
               "[TrtBackend] Failed to set min_shape for input: %s in TrtBackend.", item.first.c_str());
      // set optimization shape
@@ -438,7 +443,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
               "[TrtBackend] Cannot find input name: %s in TrtBackendOption::opt_shape.", item.first.c_str());
      FDASSERT(profile->setDimensions(item.first.c_str(),
                                      nvinfer1::OptProfileSelector::kOPT,
-                                      sample::toDims(iter->second)),
+                                      ToDims(iter->second)),
               "[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.", item.first.c_str());
      // set max shape
      iter = option.max_shape.find(item.first);
@@ -446,21 +451,21 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
               "[TrtBackend] Cannot find input name: %s in TrtBackendOption::max_shape.", item.first);
      FDASSERT(profile->setDimensions(item.first.c_str(),
                                      nvinfer1::OptProfileSelector::kMAX,
-                                      sample::toDims(iter->second)),
+                                      ToDims(iter->second)),
               "[TrtBackend] Failed to set max_shape for input: %s in TrtBackend.", item.first);
    }
    config->addOptimizationProfile(profile);
  }
-  SampleUniquePtr<IHostMemory> plan{
+  FDUniquePtr<nvinfer1::IHostMemory> plan{
      builder_->buildSerializedNetwork(*network_, *config)};
  if (!plan) {
    FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
    return false;
  }
-  SampleUniquePtr<IRuntime> runtime{
+  FDUniquePtr<nvinfer1::IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
+      nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
  if (!runtime) {
    FDERROR << "Failed to call createInferRuntime()." << std::endl;
    return false;
@@ -468,7 +473,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
      runtime->deserializeCudaEngine(plan->data(), plan->size()),
-      samplesCommon::InferDeleter());
+      FDInferDeleter());
  if (!engine_) {
    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
    return false;
--- a/csrc/fastdeploy/backends/tensorrt/trt_backend.h
+++ b/csrc/fastdeploy/backends/tensorrt/trt_backend.h
@@ -20,19 +20,12 @@
 #include <vector>
 #include "fastdeploy/backends/backend.h"
-
+#include "fastdeploy/backends/tensorrt/utils.h"
 #include "fastdeploy/backends/tensorrt/common/argsParser.h"
 #include "fastdeploy/backends/tensorrt/common/buffers.h"
 #include "fastdeploy/backends/tensorrt/common/common.h"
 #include "fastdeploy/backends/tensorrt/common/logger.h"
 #include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
 #include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
 #include <cuda_runtime_api.h>
 #include "NvOnnxParser.h"
 #include "NvInfer.h"
 namespace fastdeploy {
 using namespace samplesCommon;
 struct TrtValueInfo {
  std::string name;
@@ -86,15 +79,15 @@ class TrtBackend : public BaseBackend {
 private:
  std::shared_ptr<nvinfer1::ICudaEngine> engine_;
  std::shared_ptr<nvinfer1::IExecutionContext> context_;
-  SampleUniquePtr<nvonnxparser::IParser> parser_;
+  FDUniquePtr<nvonnxparser::IParser> parser_;
-  SampleUniquePtr<nvinfer1::IBuilder> builder_;
+  FDUniquePtr<nvinfer1::IBuilder> builder_;
-  SampleUniquePtr<nvinfer1::INetworkDefinition> network_;
+  FDUniquePtr<nvinfer1::INetworkDefinition> network_;
  cudaStream_t stream_{};
  std::vector<void*> bindings_;
  std::vector<TrtValueInfo> inputs_desc_;
  std::vector<TrtValueInfo> outputs_desc_;
-  std::map<std::string, DeviceBuffer> inputs_buffer_;
+  std::map<std::string, FDDeviceBuffer> inputs_buffer_;
-  std::map<std::string, DeviceBuffer> outputs_buffer_;
+  std::map<std::string, FDDeviceBuffer> outputs_buffer_;
  // Sometimes while the number of outputs > 1
  // the output order of tensorrt may not be same
--- a/csrc/fastdeploy/backends/tensorrt/utils.h
+++ b/csrc/fastdeploy/backends/tensorrt/utils.h
@@ -0,0 +1,199 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <iostream>
 #include <map>
 #include <string>
 #include <vector>
 #include <algorithm>
 #include <cuda_runtime_api.h>
 #include "NvInfer.h"
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
 struct FDInferDeleter {
  template<typename T> void operator()(T* obj) const {
    delete obj;
  }
 };
 template<typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
 inline uint32_t GetElementSize(nvinfer1::DataType t) noexcept {
  switch (t) {
  case nvinfer1::DataType::kINT32:
    return 4;
  case nvinfer1::DataType::kFLOAT:
    return 4;
  case nvinfer1::DataType::kHALF:
    return 2;
  case nvinfer1::DataType::kBOOL:
  case nvinfer1::DataType::kINT8:
    return 1;
  }
  return 0;
 }
 inline int64_t Volume(const nvinfer1::Dims& d) {
  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
 }
 inline nvinfer1::Dims ToDims(const std::vector<int>& vec) {
  int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
  if (static_cast<int>(vec.size()) > limit) {
    FDWARNING << "Vector too long, only first 8 elements are used in dimension." << std::endl;
  }
  // Pick first nvinfer1::Dims::MAX_DIMS elements
  nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
  std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
  return dims;
 }
 template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
 public:
  //!
  //! \brief Construct an empty buffer.
  //!
  explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
      : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
  //!
  //! \brief Construct a buffer with the specified allocation size in bytes.
  //!
  FDGenericBuffer(size_t size, nvinfer1::DataType type)
      : mSize(size), mCapacity(size), mType(type) {
    if (!allocFn(&mBuffer, this->nbBytes())) {
      throw std::bad_alloc();
    }
  }
  FDGenericBuffer(FDGenericBuffer&& buf)
      : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
        mBuffer(buf.mBuffer) {
    buf.mSize = 0;
    buf.mCapacity = 0;
    buf.mType = nvinfer1::DataType::kFLOAT;
    buf.mBuffer = nullptr;
  }
  FDGenericBuffer& operator=(FDGenericBuffer&& buf) {
    if (this != &buf) {
      freeFn(mBuffer);
      mSize = buf.mSize;
      mCapacity = buf.mCapacity;
      mType = buf.mType;
      mBuffer = buf.mBuffer;
      // Reset buf.
      buf.mSize = 0;
      buf.mCapacity = 0;
      buf.mBuffer = nullptr;
    }
    return *this;
  }
  //!
  //! \brief Returns pointer to underlying array.
  //!
  void* data() { return mBuffer; }
  //!
  //! \brief Returns pointer to underlying array.
  //!
  const void* data() const { return mBuffer; }
  //!
  //! \brief Returns the size (in number of elements) of the buffer.
  //!
  size_t size() const { return mSize; }
  //!
  //! \brief Returns the size (in bytes) of the buffer.
  //!
  size_t nbBytes() const {
    return this->size() * GetElementSize(mType);
  }
  //!
  //! \brief Resizes the buffer. This is a no-op if the new size is smaller than
  //! or equal to the current capacity.
  //!
  void resize(size_t newSize) {
    mSize = newSize;
    if (mCapacity < newSize) {
      freeFn(mBuffer);
      if (!allocFn(&mBuffer, this->nbBytes())) {
        throw std::bad_alloc{};
      }
      mCapacity = newSize;
    }
  }
  //!
  //! \brief Overload of resize that accepts Dims
  //!
  void resize(const nvinfer1::Dims& dims) {
    return this->resize(Volume(dims));
  }
  ~FDGenericBuffer() { freeFn(mBuffer); }
 private:
  size_t mSize{0}, mCapacity{0};
  nvinfer1::DataType mType;
  void* mBuffer;
  AllocFunc allocFn;
  FreeFunc freeFn;
 };
 class FDDeviceAllocator {
 public:
  bool operator()(void** ptr, size_t size) const {
    return cudaMalloc(ptr, size) == cudaSuccess;
  }
 };
 class FDDeviceFree {
 public:
  void operator()(void* ptr) const { cudaFree(ptr); }
 };
 using FDDeviceBuffer = FDGenericBuffer<FDDeviceAllocator, FDDeviceFree>;
 class FDTrtLogger : public nvinfer1::ILogger {
 public:
  static FDTrtLogger* logger;
  static FDTrtLogger* Get() {
    if (logger != nullptr) {
      return logger;
    }
    logger = new FDTrtLogger();
    return logger;
  }
  void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override {
    if (severity == nvinfer1::ILogger::Severity::kINFO) {
      FDINFO << msg << std::endl;
    } else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
      FDWARNING << msg << std::endl;
    } else if (severity == nvinfer1::ILogger::Severity::kERROR) {
      FDERROR << msg << std::endl;
    } else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
      FDASSERT(false, "%s", msg);
    }
  }
 };
 }  // namespace fastdeploy
--- a/csrc/fastdeploy/core/fd_tensor.cc
+++ b/csrc/fastdeploy/core/fd_tensor.cc
@@ -122,7 +122,7 @@ void FDTensor::PrintInfo(const std::string& prefix) {
             "PrintInfo function doesn't support current situation, maybe you "
             "need enhance this function now.");
  }
-  std::cout << prefix << ": shape=";
+  std::cout << prefix << ": name=" << name << ", shape=";
  for (int i = 0; i < shape.size(); ++i) {
    std::cout << shape[i] << " ";
  }
		`@@ -1 +0,0 @@`
			`目录代码来源自 https://github.com/NVIDIA/TensorRT`