Remove tensorrt/common codes (#171)

This commit is contained in:
Jason
2022-08-29 19:21:18 +08:00
committed by GitHub
parent 00e3a4ad99
commit 737b62a2ba
33 changed files with 232 additions and 14432 deletions

View File

@@ -1,342 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BATCH_STREAM_H
#define BATCH_STREAM_H
#include "NvInfer.h"
#include "common.h"
#include <algorithm>
#include <stdio.h>
#include <vector>
class IBatchStream {
public:
virtual void reset(int firstBatch) = 0;
virtual bool next() = 0;
virtual void skip(int skipCount) = 0;
virtual float* getBatch() = 0;
virtual float* getLabels() = 0;
virtual int getBatchesRead() const = 0;
virtual int getBatchSize() const = 0;
virtual nvinfer1::Dims getDims() const = 0;
};
class MNISTBatchStream : public IBatchStream {
public:
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
const std::string& labelsFile,
const std::vector<std::string>& directories)
: mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
//!< We already know the dimensions of MNIST images.
{
readDataFile(locateFile(dataFile, directories));
readLabelsFile(locateFile(labelsFile, directories));
}
void reset(int firstBatch) override { mBatchCount = firstBatch; }
bool next() override {
if (mBatchCount >= mMaxBatches) {
return false;
}
++mBatchCount;
return true;
}
void skip(int skipCount) override { mBatchCount += skipCount; }
float* getBatch() override {
return mData.data() +
(mBatchCount * mBatchSize * samplesCommon::volume(mDims));
}
float* getLabels() override {
return mLabels.data() + (mBatchCount * mBatchSize);
}
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override {
return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
}
private:
void readDataFile(const std::string& dataFilePath) {
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
int magicNumber, numImages, imageH, imageW;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2051 &&
"Magic Number does not match the expected value for an MNIST image "
"set");
// Read number of images and dimensions
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
numImages = samplesCommon::swapEndianness(numImages);
imageH = samplesCommon::swapEndianness(imageH);
imageW = samplesCommon::swapEndianness(imageW);
// The MNIST data is made up of unsigned bytes, so we need to cast to float
// and normalize.
int numElements = numImages * imageH * imageW;
std::vector<uint8_t> rawData(numElements);
file.read(reinterpret_cast<char*>(rawData.data()),
numElements * sizeof(uint8_t));
mData.resize(numElements);
std::transform(rawData.begin(), rawData.end(), mData.begin(),
[](uint8_t val) { return static_cast<float>(val) / 255.f; });
}
void readLabelsFile(const std::string& labelsFilePath) {
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
int magicNumber, numImages;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2049 &&
"Magic Number does not match the expected value for an MNIST labels "
"file");
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
numImages = samplesCommon::swapEndianness(numImages);
std::vector<uint8_t> rawLabels(numImages);
file.read(reinterpret_cast<char*>(rawLabels.data()),
numImages * sizeof(uint8_t));
mLabels.resize(numImages);
std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
[](uint8_t val) { return static_cast<float>(val); });
}
int mBatchSize{0};
int mBatchCount{
0}; //!< The batch that will be read on the next invocation of next()
int mMaxBatches{0};
Dims mDims{};
std::vector<float> mData{};
std::vector<float> mLabels{};
};
class BatchStream : public IBatchStream {
public:
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::string suffix, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
mSuffix(suffix), mDataDir(directories) {
FILE* file = fopen(
locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
"rb");
ASSERT(file != nullptr);
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
mDims.nbDims = 4; // The number of dimensions.
mDims.d[0] = d[0]; // Batch Size
mDims.d[1] = d[1]; // Channels
mDims.d[2] = d[2]; // Height
mDims.d[3] = d[3]; // Width
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
mDims.d[3] > 0);
fclose(file);
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::vector<std::string> directories)
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
std::string listFile, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
mListFile(listFile), mDataDir(directories) {
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
// Resets data members
void reset(int firstBatch) override {
mBatchCount = 0;
mFileCount = 0;
mFileBatchPos = mDims.d[0];
skip(firstBatch);
}
// Advance to next batch and return true, or return false if there is no batch
// left.
bool next() override {
if (mBatchCount == mMaxBatches) {
return false;
}
for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
batchPos += csize, mFileBatchPos += csize) {
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
if (mFileBatchPos == mDims.d[0] && !update()) {
return false;
}
// copy the smaller of: elements left to fulfill the request, or elements
// left in the file buffer.
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
csize * mImageSize, getBatch() + batchPos * mImageSize);
std::copy_n(getFileLabels() + mFileBatchPos, csize,
getLabels() + batchPos);
}
mBatchCount++;
return true;
}
// Skips the batches
void skip(int skipCount) override {
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
mFileBatchPos == mDims.d[0]) {
mFileCount += skipCount * mBatchSize / mDims.d[0];
return;
}
int x = mBatchCount;
for (int i = 0; i < skipCount; i++) {
next();
}
mBatchCount = x;
}
float* getBatch() override { return mBatch.data(); }
float* getLabels() override { return mLabels.data(); }
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override { return mDims; }
private:
float* getFileBatch() { return mFileBatch.data(); }
float* getFileLabels() { return mFileLabels.data(); }
bool update() {
if (mListFile.empty()) {
std::string inputFileName = locateFile(
mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
FILE* file = fopen(inputFileName.c_str(), "rb");
if (!file) {
return false;
}
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
mDims.d[3] == d[3]);
size_t readInputCount =
fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
size_t readLabelCount =
fread(getFileLabels(), sizeof(float), mDims.d[0], file);
ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
fclose(file);
} else {
std::vector<std::string> fNames;
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
if (!file) {
return false;
}
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
file.seekg(((mBatchCount * mBatchSize)) * 7);
for (int i = 1; i <= mBatchSize; i++) {
std::string sName;
std::getline(file, sName);
sName = sName + ".ppm";
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
fNames.emplace_back(sName);
}
mFileCount++;
const int imageC = 3;
const int imageH = 300;
const int imageW = 300;
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
fNames.size());
for (uint32_t i = 0; i < fNames.size(); ++i) {
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
}
std::vector<float> data(samplesCommon::volume(mDims));
const float scale = 2.0 / 255.0;
const float bias = 1.0;
long int volChl = mDims.d[2] * mDims.d[3];
// Normalize input data
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
i < mBatchSize; ++i) {
for (int c = 0; c < mDims.d[1]; ++c) {
for (int j = 0; j < volChl; ++j) {
data[i * volImg + c * volChl + j] =
scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
}
}
}
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
}
mFileBatchPos = 0;
return true;
}
int mBatchSize{0};
int mMaxBatches{0};
int mBatchCount{0};
int mFileCount{0};
int mFileBatchPos{0};
int mImageSize{0};
std::vector<float> mBatch; //!< Data for the batch
std::vector<float> mLabels; //!< Labels for the batch
std::vector<float> mFileBatch; //!< List of image files
std::vector<float> mFileLabels; //!< List of label files
std::string mPrefix; //!< Batch file name prefix
std::string mSuffix; //!< Batch file name suffix
nvinfer1::Dims mDims; //!< Input dimensions
std::string mListFile; //!< File name of the list of image names
std::vector<std::string>
mDataDir; //!< Directories where the files can be found
};
#endif

View File

@@ -1 +0,0 @@
exclude_files=.*

View File

@@ -1,118 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "BatchStream.h"
#include "NvInfer.h"
//! \class EntropyCalibratorImpl
//!
//! \brief Implements common functionality for Entropy calibrators.
//!
template <typename TBatchStream> class EntropyCalibratorImpl {
public:
EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
std::string networkName, const char* inputBlobName,
bool readCache = true)
: mStream{stream},
mCalibrationTableName("CalibrationTable" + networkName),
mInputBlobName(inputBlobName), mReadCache(readCache) {
nvinfer1::Dims dims = mStream.getDims();
mInputCount = samplesCommon::volume(dims);
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
mStream.reset(firstBatch);
}
virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
int getBatchSize() const noexcept { return mStream.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept {
if (!mStream.next()) {
return false;
}
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
mInputCount * sizeof(float), cudaMemcpyHostToDevice));
ASSERT(!strcmp(names[0], mInputBlobName));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) noexcept {
mCalibrationCache.clear();
std::ifstream input(mCalibrationTableName, std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good()) {
std::copy(std::istream_iterator<char>(input),
std::istream_iterator<char>(),
std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? mCalibrationCache.data() : nullptr;
}
void writeCalibrationCache(const void* cache, size_t length) noexcept {
std::ofstream output(mCalibrationTableName, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
private:
TBatchStream mStream;
size_t mInputCount;
std::string mCalibrationTableName;
const char* mInputBlobName;
bool mReadCache{true};
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache;
};
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
template <typename TBatchStream>
class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
public:
Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
const char* networkName, const char* inputBlobName,
bool readCache = true)
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept override {
return mImpl.getBatch(bindings, names, nbBindings);
}
const void* readCalibrationCache(size_t& length) noexcept override {
return mImpl.readCalibrationCache(length);
}
void writeCalibrationCache(const void* cache,
size_t length) noexcept override {
mImpl.writeCalibrationCache(cache, length);
}
private:
EntropyCalibratorImpl<TBatchStream> mImpl;
};
#endif // ENTROPY_CALIBRATOR_H

View File

@@ -1,115 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ERROR_RECORDER_H
#define ERROR_RECORDER_H
#include "NvInferRuntimeCommon.h"
#include "logger.h"
#include <atomic>
#include <cstdint>
#include <exception>
#include <mutex>
#include <vector>
using nvinfer1::ErrorCode;
using nvinfer1::IErrorRecorder;
//!
//! A simple implementation of the IErrorRecorder interface for
//! use by samples. This interface also can be used as a reference
//! implementation.
//! The sample Error recorder is based on a vector that pairs the error
//! code and the error string into a single element. It also uses
//! standard mutex's and atomics in order to make sure that the code
//! works in a multi-threaded environment.
//!
class SampleErrorRecorder : public IErrorRecorder {
using errorPair = std::pair<ErrorCode, std::string>;
using errorStack = std::vector<errorPair>;
public:
SampleErrorRecorder() = default;
virtual ~SampleErrorRecorder() noexcept {}
int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
: (*this)[errorIdx].first;
};
IErrorRecorder::ErrorDesc
getErrorDesc(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
: (*this)[errorIdx].second.c_str();
}
// This class can never overflow since we have dynamic resize via std::vector
// usage.
bool hasOverflowed() const noexcept final { return false; }
// Empty the errorStack.
void clear() noexcept final {
try {
// grab a lock so that there is no addition while clearing.
std::lock_guard<std::mutex> guard(mStackLock);
mErrorStack.clear();
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
};
//! Simple helper function that
bool empty() const noexcept { return mErrorStack.empty(); }
bool reportError(ErrorCode val,
IErrorRecorder::ErrorDesc desc) noexcept final {
try {
std::lock_guard<std::mutex> guard(mStackLock);
sample::gLogError << "Error[" << static_cast<int32_t>(val)
<< "]: " << desc << std::endl;
mErrorStack.push_back(errorPair(val, desc));
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
// All errors are considered fatal.
return true;
}
// Atomically increment or decrement the ref counter.
IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
private:
// Simple helper functions.
const errorPair& operator[](size_t index) const noexcept {
return mErrorStack[index];
}
bool invalidIndexCheck(int32_t index) const noexcept {
// By converting signed to unsigned, we only need a single check since
// negative numbers turn into large positive greater than the size.
size_t sIndex = index;
return sIndex >= mErrorStack.size();
}
// Mutex to hold when locking mErrorStack.
std::mutex mStackLock;
// Reference count of the class. Destruction of the class when mRefCount
// is not zero causes undefined behavior.
std::atomic<int32_t> mRefCount{0};
// The error stack that holds the errors recorded by TensorRT.
errorStack mErrorStack;
}; // class SampleErrorRecorder
#endif // ERROR_RECORDER_H

View File

@@ -1 +0,0 @@
目录代码来源自 https://github.com/NVIDIA/TensorRT

View File

@@ -1,169 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_ARGS_PARSER_H
#define TENSORRT_ARGS_PARSER_H
#include <string>
#include <vector>
#ifdef _MSC_VER
#include ".\windows\getopt.h"
#else
#include <getopt.h>
#endif
#include <iostream>
namespace samplesCommon {
//!
//! \brief The SampleParams structure groups the basic parameters required by
//! all sample networks.
//!
struct SampleParams {
int32_t batchSize{1}; //!< Number of inputs in a batch
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
bool int8{false}; //!< Allow runnning the network in Int8 mode.
bool fp16{false}; //!< Allow running the network in FP16 mode.
std::vector<std::string>
dataDirs; //!< Directory paths where sample data files are stored
std::vector<std::string> inputTensorNames;
std::vector<std::string> outputTensorNames;
};
//!
//! \brief The CaffeSampleParams structure groups the additional parameters
//! required by
//! networks that use caffe
//!
struct CaffeSampleParams : public SampleParams {
std::string
prototxtFileName; //!< Filename of prototxt design file of a network
std::string
weightsFileName; //!< Filename of trained weights file of a network
std::string meanFileName; //!< Filename of mean file of a network
};
//!
//! \brief The OnnxSampleParams structure groups the additional parameters
//! required by
//! networks that use ONNX
//!
struct OnnxSampleParams : public SampleParams {
std::string onnxFileName; //!< Filename of ONNX file of a network
};
//!
//! \brief The UffSampleParams structure groups the additional parameters
//! required by
//! networks that use Uff
//!
struct UffSampleParams : public SampleParams {
std::string uffFileName; //!< Filename of uff file of a network
};
//!
//! /brief Struct to maintain command-line arguments.
//!
struct Args {
bool runInInt8{false};
bool runInFp16{false};
bool help{false};
int32_t useDLACore{-1};
int32_t batch{1};
std::vector<std::string> dataDirs;
std::string saveEngine;
std::string loadEngine;
bool useILoop{false};
};
//!
//! \brief Populates the Args struct with the provided command-line parameters.
//!
//! \throw invalid_argument if any of the arguments are not valid
//!
//! \return boolean If return value is true, execution can continue, otherwise
//! program should exit
//!
inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
while (1) {
int32_t arg;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"datadir", required_argument, 0, 'd'},
{"int8", no_argument, 0, 'i'},
{"fp16", no_argument, 0, 'f'},
{"useILoop", no_argument, 0, 'l'},
{"saveEngine", required_argument, 0, 's'},
{"loadEngine", no_argument, 0, 'o'},
{"useDLACore", required_argument, 0, 'u'},
{"batch", required_argument, 0, 'b'},
{nullptr, 0, nullptr, 0}};
int32_t option_index = 0;
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
if (arg == -1) {
break;
}
switch (arg) {
case 'h':
args.help = true;
return true;
case 'd':
if (optarg) {
args.dataDirs.push_back(optarg);
} else {
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
return false;
}
break;
case 's':
if (optarg) {
args.saveEngine = optarg;
}
break;
case 'o':
if (optarg) {
args.loadEngine = optarg;
}
break;
case 'i':
args.runInInt8 = true;
break;
case 'f':
args.runInFp16 = true;
break;
case 'l':
args.useILoop = true;
break;
case 'u':
if (optarg) {
args.useDLACore = std::stoi(optarg);
}
break;
case 'b':
if (optarg) {
args.batch = std::stoi(optarg);
}
break;
default:
return false;
}
}
return true;
}
} // namespace samplesCommon
#endif // TENSORRT_ARGS_PARSER_H

View File

@@ -1,426 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_BUFFERS_H
#define TENSORRT_BUFFERS_H
#include "NvInfer.h"
#include "common.h"
#include "half.h"
#include <cassert>
#include <cuda_runtime_api.h>
#include <iostream>
#include <iterator>
#include <memory>
#include <new>
#include <numeric>
#include <string>
#include <vector>
namespace samplesCommon {
//!
//! \brief The GenericBuffer class is a templated class for buffers.
//!
//! \details This templated RAII (Resource Acquisition Is Initialization) class
//! handles the allocation,
//! deallocation, querying of buffers on both the device and the host.
//! It can handle data of arbitrary types because it stores byte
//! buffers.
//! The template parameters AllocFunc and FreeFunc are used for the
//! allocation and deallocation of the buffer.
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
//! and returns bool. ptr is a pointer to where the allocated buffer
//! address should be stored.
//! size is the amount of memory in bytes to allocate.
//! The boolean indicates whether or not the memory allocation was
//! successful.
//! FreeFunc must be a functor that takes in (void* ptr) and returns
//! void.
//! ptr is the allocated buffer address. It must work with nullptr
//! input.
//!
template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
public:
//!
//! \brief Construct an empty buffer.
//!
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
//!
//! \brief Construct a buffer with the specified allocation size in bytes.
//!
GenericBuffer(size_t size, nvinfer1::DataType type)
: mSize(size), mCapacity(size), mType(type) {
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc();
}
}
GenericBuffer(GenericBuffer&& buf)
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
mBuffer(buf.mBuffer) {
buf.mSize = 0;
buf.mCapacity = 0;
buf.mType = nvinfer1::DataType::kFLOAT;
buf.mBuffer = nullptr;
}
GenericBuffer& operator=(GenericBuffer&& buf) {
if (this != &buf) {
freeFn(mBuffer);
mSize = buf.mSize;
mCapacity = buf.mCapacity;
mType = buf.mType;
mBuffer = buf.mBuffer;
// Reset buf.
buf.mSize = 0;
buf.mCapacity = 0;
buf.mBuffer = nullptr;
}
return *this;
}
//!
//! \brief Returns pointer to underlying array.
//!
void* data() { return mBuffer; }
//!
//! \brief Returns pointer to underlying array.
//!
const void* data() const { return mBuffer; }
//!
//! \brief Returns the size (in number of elements) of the buffer.
//!
size_t size() const { return mSize; }
//!
//! \brief Returns the size (in bytes) of the buffer.
//!
size_t nbBytes() const {
return this->size() * samplesCommon::getElementSize(mType);
}
//!
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
//! or equal to the current capacity.
//!
void resize(size_t newSize) {
mSize = newSize;
if (mCapacity < newSize) {
freeFn(mBuffer);
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc{};
}
mCapacity = newSize;
}
}
//!
//! \brief Overload of resize that accepts Dims
//!
void resize(const nvinfer1::Dims& dims) {
return this->resize(samplesCommon::volume(dims));
}
~GenericBuffer() { freeFn(mBuffer); }
private:
size_t mSize{0}, mCapacity{0};
nvinfer1::DataType mType;
void* mBuffer;
AllocFunc allocFn;
FreeFunc freeFn;
};
class DeviceAllocator {
public:
bool operator()(void** ptr, size_t size) const {
return cudaMalloc(ptr, size) == cudaSuccess;
}
};
class DeviceFree {
public:
void operator()(void* ptr) const { cudaFree(ptr); }
};
class HostAllocator {
public:
bool operator()(void** ptr, size_t size) const {
*ptr = malloc(size);
return *ptr != nullptr;
}
};
class HostFree {
public:
void operator()(void* ptr) const { free(ptr); }
};
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
//!
//! \brief The ManagedBuffer class groups together a pair of corresponding
//! device and host buffers.
//!
class ManagedBuffer {
public:
DeviceBuffer deviceBuffer;
HostBuffer hostBuffer;
};
//!
//! \brief The BufferManager class handles host and device buffer allocation
//! and deallocation.
//!
//! \details This RAII class handles host and device buffer allocation and
//! deallocation,
//! memcpy between host and device buffers to aid with inference,
//! and debugging dumps to validate inference. The BufferManager class
//! is meant to be
//! used to simplify buffer management and any interactions between
//! buffers and the engine.
//!
class BufferManager {
public:
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
//!
//! \brief Create a BufferManager for handling buffer interactions with
//! engine.
//!
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
const int batchSize = 0,
const nvinfer1::IExecutionContext* context = nullptr)
: mEngine(engine), mBatchSize(batchSize) {
// Full Dims implies no batch size.
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
// Create host and device buffers
for (int i = 0; i < mEngine->getNbBindings(); i++) {
auto dims = context ? context->getBindingDimensions(i)
: mEngine->getBindingDimensions(i);
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
nvinfer1::DataType type = mEngine->getBindingDataType(i);
int vecDim = mEngine->getBindingVectorizedDim(i);
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
{
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
vol *= scalarsPerVec;
}
vol *= samplesCommon::volume(dims);
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
manBuf->deviceBuffer = DeviceBuffer(vol, type);
manBuf->hostBuffer = HostBuffer(vol, type);
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
mManagedBuffers.emplace_back(std::move(manBuf));
}
}
//!
//! \brief Returns a vector of device buffers that you can use directly as
//! bindings for the execute and enqueue methods of IExecutionContext.
//!
std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
//!
//! \brief Returns a vector of device buffers.
//!
const std::vector<void*>& getDeviceBindings() const {
return mDeviceBindings;
}
//!
//! \brief Returns the device buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getDeviceBuffer(const std::string& tensorName) const {
return getBuffer(false, tensorName);
}
//!
//! \brief Returns the host buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getHostBuffer(const std::string& tensorName) const {
return getBuffer(true, tensorName);
}
//!
//! \brief Returns the size of the host and device buffers that correspond to
//! tensorName.
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
//!
size_t size(const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return kINVALID_SIZE_VALUE;
return mManagedBuffers[index]->hostBuffer.nbBytes();
}
//!
//! \brief Dump host buffer with specified tensorName to ostream.
//! Prints error message to std::ostream if no such tensor can be
//! found.
//!
void dumpBuffer(std::ostream& os, const std::string& tensorName) {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1) {
os << "Invalid tensor name" << std::endl;
return;
}
void* buf = mManagedBuffers[index]->hostBuffer.data();
size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
size_t rowCount = static_cast<size_t>(
bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
int leadDim = mBatchSize;
int* trailDims = bufDims.d;
int nbDims = bufDims.nbDims;
// Fix explicit Dimension networks
if (!leadDim && nbDims > 0) {
leadDim = bufDims.d[0];
++trailDims;
--nbDims;
}
os << "[" << leadDim;
for (int i = 0; i < nbDims; i++)
os << ", " << trailDims[i];
os << "]" << std::endl;
switch (mEngine->getBindingDataType(index)) {
case nvinfer1::DataType::kINT32:
print<int32_t>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kFLOAT:
print<float>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kHALF:
print<half_float::half>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kINT8:
assert(0 && "Int8 network-level input and output is not supported");
break;
case nvinfer1::DataType::kBOOL:
assert(0 && "Bool network-level input and output are not supported");
break;
}
}
//!
//! \brief Templated print function that dumps buffers of arbitrary type to
//! std::ostream.
//! rowCount parameter controls how many elements are on each line.
//! A rowCount of 1 means that there is only 1 element on each line.
//!
template <typename T>
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
assert(rowCount != 0);
assert(bufSize % sizeof(T) == 0);
T* typedBuf = static_cast<T*>(buf);
size_t numItems = bufSize / sizeof(T);
for (int i = 0; i < static_cast<int>(numItems); i++) {
// Handle rowCount == 1 case
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
os << typedBuf[i] << std::endl;
else if (rowCount == 1)
os << typedBuf[i];
// Handle rowCount > 1 case
else if (i % rowCount == 0)
os << typedBuf[i];
else if (i % rowCount == rowCount - 1)
os << " " << typedBuf[i] << std::endl;
else
os << " " << typedBuf[i];
}
}
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! synchronously.
//!
void copyInputToDevice() { memcpyBuffers(true, false, false); }
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! synchronously.
//!
void copyOutputToHost() { memcpyBuffers(false, true, false); }
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! asynchronously.
//!
void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(true, false, true, stream);
}
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! asynchronously.
//!
void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(false, true, true, stream);
}
~BufferManager() = default;
private:
void* getBuffer(const bool isHost, const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return nullptr;
return (isHost ? mManagedBuffers[index]->hostBuffer.data()
: mManagedBuffers[index]->deviceBuffer.data());
}
void memcpyBuffers(const bool copyInput, const bool deviceToHost,
const bool async, const cudaStream_t& stream = 0) {
for (int i = 0; i < mEngine->getNbBindings(); i++) {
void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
: mManagedBuffers[i]->deviceBuffer.data();
const void* srcPtr = deviceToHost
? mManagedBuffers[i]->deviceBuffer.data()
: mManagedBuffers[i]->hostBuffer.data();
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
const cudaMemcpyKind memcpyType =
deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
if ((copyInput && mEngine->bindingIsInput(i)) ||
(!copyInput && !mEngine->bindingIsInput(i))) {
if (async)
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
else
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
}
}
}
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
std::vector<std::unique_ptr<ManagedBuffer>>
mManagedBuffers; //!< The vector of pointers to managed buffers
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
//! for engine execution
};
} // namespace samplesCommon
#endif // TENSORRT_BUFFERS_H

View File

@@ -1,844 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_COMMON_H
#define TENSORRT_COMMON_H
// For loadLibrary
#ifdef _MSC_VER
// Needed so that the max/min definitions in windows.h do not conflict with
// std::max/min.
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX
#else
#include <dlfcn.h>
#endif
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cmath>
#include <cstring>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ratio>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "safeCommon.h"
using namespace nvinfer1;
using namespace plugin;
#ifdef _MSC_VER
#define FN_NAME __FUNCTION__
#else
#define FN_NAME __func__
#endif
#if defined(__aarch64__) || defined(__QNX__)
#define ENABLE_DLA_API 1
#endif
#define CHECK_RETURN_W_MSG(status, val, errMsg) \
do { \
if (!(status)) { \
sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
<< FN_NAME << "(), line " << __LINE__ << std::endl; \
return val; \
} \
} while (0)
#undef ASSERT
#define ASSERT(condition) \
do { \
if (!(condition)) { \
sample::gLogError << "Assertion failure: " << #condition << std::endl; \
abort(); \
} \
} while (0)
#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
auto deleter = [](T* t) { t->destroy(); };
return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
}
constexpr long double operator"" _GiB(long double val) {
return val * (1 << 30);
}
constexpr long double operator"" _MiB(long double val) {
return val * (1 << 20);
}
constexpr long double operator"" _KiB(long double val) {
return val * (1 << 10);
}
// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
// Since the return type is signed, -1_GiB will work as expected.
constexpr long long int operator"" _GiB(unsigned long long val) {
return val * (1 << 30);
}
constexpr long long int operator"" _MiB(unsigned long long val) {
return val * (1 << 20);
}
constexpr long long int operator"" _KiB(unsigned long long val) {
return val * (1 << 10);
}
struct SimpleProfiler : public nvinfer1::IProfiler {
struct Record {
float time{0};
int count{0};
};
virtual void reportLayerTime(const char* layerName, float ms) noexcept {
mProfile[layerName].count++;
mProfile[layerName].time += ms;
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
mLayerNames.end()) {
mLayerNames.push_back(layerName);
}
}
SimpleProfiler(const char* name,
const std::vector<SimpleProfiler>& srcProfilers =
std::vector<SimpleProfiler>())
: mName(name) {
for (const auto& srcProfiler : srcProfilers) {
for (const auto& rec : srcProfiler.mProfile) {
auto it = mProfile.find(rec.first);
if (it == mProfile.end()) {
mProfile.insert(rec);
} else {
it->second.time += rec.second.time;
it->second.count += rec.second.count;
}
}
}
}
friend std::ostream& operator<<(std::ostream& out,
const SimpleProfiler& value) {
out << "========== " << value.mName << " profile ==========" << std::endl;
float totalTime = 0;
std::string layerNameStr = "TensorRT layer name";
int maxLayerNameLength =
std::max(static_cast<int>(layerNameStr.size()), 70);
for (const auto& elem : value.mProfile) {
totalTime += elem.second.time;
maxLayerNameLength =
std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
}
auto old_settings = out.flags();
auto old_precision = out.precision();
// Output header
{
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
out << std::setw(12) << "Runtime, "
<< "%"
<< " ";
out << std::setw(12) << "Invocations"
<< " ";
out << std::setw(12) << "Runtime, ms" << std::endl;
}
for (size_t i = 0; i < value.mLayerNames.size(); i++) {
const std::string layerName = value.mLayerNames[i];
auto elem = value.mProfile.at(layerName);
out << std::setw(maxLayerNameLength) << layerName << " ";
out << std::setw(12) << std::fixed << std::setprecision(1)
<< (elem.time * 100.0F / totalTime) << "%"
<< " ";
out << std::setw(12) << elem.count << " ";
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
<< std::endl;
}
out.flags(old_settings);
out.precision(old_precision);
out << "========== " << value.mName << " total runtime = " << totalTime
<< " ms ==========" << std::endl;
return out;
}
private:
std::string mName;
std::vector<std::string> mLayerNames;
std::map<std::string, Record> mProfile;
};
//! Locate path to file, given its filename or filepath suffix and possible dirs
//! it might lie in.
//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
//! file path.
inline std::string locateFile(const std::string& filepathSuffix,
const std::vector<std::string>& directories,
bool reportError = true) {
const int MAX_DEPTH{10};
bool found{false};
std::string filepath;
for (auto& dir : directories) {
if (!dir.empty() && dir.back() != '/') {
#ifdef _MSC_VER
filepath = dir + "\\" + filepathSuffix;
#else
filepath = dir + "/" + filepathSuffix;
#endif
} else {
filepath = dir + filepathSuffix;
}
for (int i = 0; i < MAX_DEPTH && !found; i++) {
const std::ifstream checkFile(filepath);
found = checkFile.is_open();
if (found) {
break;
}
filepath = "../" + filepath; // Try again in parent dir
}
if (found) {
break;
}
filepath.clear();
}
// Could not find the file
if (filepath.empty()) {
const std::string dirList = std::accumulate(
directories.begin() + 1, directories.end(), directories.front(),
[](const std::string& a, const std::string& b) {
return a + "\n\t" + b;
});
std::cout << "Could not find " << filepathSuffix
<< " in data directories:\n\t" << dirList << std::endl;
if (reportError) {
std::cout << "&&&& FAILED" << std::endl;
exit(EXIT_FAILURE);
}
}
return filepath;
}
inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
int inW) {
std::ifstream infile(fileName, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
std::string magic, h, w, max;
infile >> magic >> h >> w >> max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(buffer), inH * inW);
}
namespace samplesCommon {
// Swaps endianness of an integral type.
template <typename T,
typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
inline T swapEndianness(const T& value) {
uint8_t bytes[sizeof(T)];
for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
}
return *reinterpret_cast<T*>(bytes);
}
class HostMemory {
public:
HostMemory() = delete;
virtual void* data() const noexcept { return mData; }
virtual std::size_t size() const noexcept { return mSize; }
virtual DataType type() const noexcept { return mType; }
virtual ~HostMemory() {}
protected:
HostMemory(std::size_t size, DataType type)
: mData{nullptr}, mSize(size), mType(type) {}
void* mData;
std::size_t mSize;
DataType mType;
};
template <typename ElemType, DataType dataType>
class TypedHostMemory : public HostMemory {
public:
explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
mData = new ElemType[size];
};
~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
};
using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
inline void* safeCudaMalloc(size_t memSize) {
void* deviceMem;
CHECK(cudaMalloc(&deviceMem, memSize));
if (deviceMem == nullptr) {
std::cerr << "Out of memory" << std::endl;
exit(1);
}
return deviceMem;
}
inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
struct InferDeleter {
template <typename T> void operator()(T* obj) const { delete obj; }
};
template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
static auto StreamDeleter = [](cudaStream_t* pStream) {
if (pStream) {
cudaStreamDestroy(*pStream);
delete pStream;
}
};
inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
new cudaStream_t, StreamDeleter);
if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
cudaSuccess) {
pStream.reset(nullptr);
}
return pStream;
}
//! Return vector of indices that puts magnitudes of sequence in descending
//! order.
template <class Iter>
std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
std::vector<size_t> indices(end - begin);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
return std::abs(begin[j]) < std::abs(begin[i]);
});
return indices;
}
inline bool readReferenceFile(const std::string& fileName,
std::vector<std::string>& refVector) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
"that is not open."
<< std::endl;
return false;
}
std::string line;
while (std::getline(infile, line)) {
if (line.empty())
continue;
refVector.push_back(line);
}
infile.close();
return true;
}
template <typename T>
std::vector<std::string> classify(const std::vector<std::string>& refVector,
const std::vector<T>& output,
const size_t topK) {
const auto inds =
samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
std::vector<std::string> result;
result.reserve(topK);
for (size_t k = 0; k < topK; ++k) {
result.push_back(refVector[inds[k]]);
}
return result;
}
// Returns indices of highest K magnitudes in v.
template <typename T>
std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
std::vector<size_t> indices =
samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
indices.resize(k);
return indices;
}
template <typename T>
bool readASCIIFile(const std::string& fileName, const size_t size,
std::vector<T>& out) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
"not open."
<< std::endl;
return false;
}
out.clear();
out.reserve(size);
out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
infile.close();
return true;
}
template <typename T>
bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
std::ofstream outfile(fileName);
if (!outfile.is_open()) {
std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
"not open."
<< std::endl;
return false;
}
for (auto fn : in) {
outfile << fn << "\n";
}
outfile.close();
return true;
}
inline void print_version() {
std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "."
<< NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
<< NV_TENSORRT_BUILD << std::endl;
}
inline std::string getFileType(const std::string& filepath) {
return filepath.substr(filepath.find_last_of(".") + 1);
}
inline std::string toLower(const std::string& inp) {
std::string out = inp;
std::transform(out.begin(), out.end(), out.begin(), ::tolower);
return out;
}
inline float getMaxValue(const float* buffer, int64_t size) {
assert(buffer != nullptr);
assert(size > 0);
return *std::max_element(buffer, buffer + size);
}
// Ensures that every tensor used by a network has a dynamic range set.
//
// All tensors in a network must have a dynamic range specified if a calibrator
// is not used.
// This function is just a utility to globally fill in missing scales and
// zero-points for the entire network.
//
// If a tensor does not have a dyanamic range set, it is assigned inRange or
// outRange as follows:
//
// * If the tensor is the input to a layer or output of a pooling node, its
// dynamic range is derived from inRange.
// * Otherwise its dynamic range is derived from outRange.
//
// The default parameter values are intended to demonstrate, for final layers in
// the network,
// cases where dynamic ranges are asymmetric.
//
// The default parameter values choosen arbitrarily. Range values should be
// choosen such that
// we avoid underflow or overflow. Also range value should be non zero to avoid
// uniform zero scale tensor.
inline void setAllDynamicRanges(INetworkDefinition* network,
float inRange = 2.0f, float outRange = 4.0f) {
// Ensure that all layer inputs have a scale.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbInputs(); j++) {
ITensor* input{layer->getInput(j)};
// Optional inputs are nullptr here and are from RNN layers.
if (input != nullptr && !input->dynamicRangeIsSet()) {
ASSERT(input->setDynamicRange(-inRange, inRange));
}
}
}
// Ensure that all layer outputs have a scale.
// Tensors that are also inputs to layers are ingored here
// since the previous loop nest assigned scales to them.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbOutputs(); j++) {
ITensor* output{layer->getOutput(j)};
// Optional outputs are nullptr here and are from RNN layers.
if (output != nullptr && !output->dynamicRangeIsSet()) {
// Pooling must have the same input and output scales.
if (layer->getType() == LayerType::kPOOLING) {
ASSERT(output->setDynamicRange(-inRange, inRange));
} else {
ASSERT(output->setDynamicRange(-outRange, outRange));
}
}
}
}
}
inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
INetworkDefinition* n) {
// Set dummy per-tensor dynamic range if Int8 mode is requested.
if (c->getFlag(BuilderFlag::kINT8)) {
sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
"per-tensor dynamic range. Int8 accuracy is not "
"guaranteed."
<< std::endl;
setAllDynamicRanges(n);
}
}
inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
bool allowGPUFallback = true) {
if (useDLACore >= 0) {
if (builder->getNbDLACores() == 0) {
std::cerr << "Trying to use DLA core " << useDLACore
<< " on a platform that doesn't have any DLA cores"
<< std::endl;
assert(
"Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
false);
}
if (allowGPUFallback) {
config->setFlag(BuilderFlag::kGPU_FALLBACK);
}
if (!config->getFlag(BuilderFlag::kINT8)) {
// User has not requested INT8 Mode.
// By default run in FP16 mode. FP32 mode is not permitted.
config->setFlag(BuilderFlag::kFP16);
}
config->setDefaultDeviceType(DeviceType::kDLA);
config->setDLACore(useDLACore);
}
}
inline int32_t parseDLA(int32_t argc, char** argv) {
for (int32_t i = 1; i < argc; i++) {
if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
return std::stoi(argv[i] + 13);
}
}
return -1;
}
inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
switch (t) {
case nvinfer1::DataType::kINT32:
return 4;
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
inline int64_t volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
template <int C, int H, int W> struct PPM {
std::string magic, fileName;
int h, w, max;
uint8_t buffer[C * H * W];
};
// New vPPM(variable sized PPM) class with variable dimensions.
struct vPPM {
std::string magic, fileName;
int h, w, max;
std::vector<uint8_t> buffer;
};
struct BBox {
float x1, y1, x2, y2;
};
template <int C, int H, int W>
void readPPMFile(const std::string& filename,
samplesCommon::PPM<C, H, W>& ppm) {
ppm.fileName = filename;
std::ifstream infile(filename, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void readPPMFile(const std::string& filename, vPPM& ppm,
std::vector<std::string>& input_dir) {
ppm.fileName = filename;
std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
ppm.buffer.push_back(0);
}
infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
template <int C, int H, int W>
void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
const BBox& bbox) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
for (int x = x1; x <= x2; ++x) {
// bbox top border
ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
}
for (int y = y1; y <= y2; ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + x1) * 3] = 255;
ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + x2) * 3] = 255;
ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
}
outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
std::vector<BBox>& dets) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
for (auto bbox : dets) {
for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
// bbox top border
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
}
for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
}
}
outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
class TimerBase {
public:
virtual void start() {}
virtual void stop() {}
float microseconds() const noexcept { return mMs * 1000.f; }
float milliseconds() const noexcept { return mMs; }
float seconds() const noexcept { return mMs / 1000.f; }
void reset() noexcept { mMs = 0.f; }
protected:
float mMs{0.0f};
};
class GpuTimer : public TimerBase {
public:
explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
CHECK(cudaEventCreate(&mStart));
CHECK(cudaEventCreate(&mStop));
}
~GpuTimer() {
CHECK(cudaEventDestroy(mStart));
CHECK(cudaEventDestroy(mStop));
}
void start() { CHECK(cudaEventRecord(mStart, mStream)); }
void stop() {
CHECK(cudaEventRecord(mStop, mStream));
float ms{0.0f};
CHECK(cudaEventSynchronize(mStop));
CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
mMs += ms;
}
private:
cudaEvent_t mStart, mStop;
cudaStream_t mStream;
}; // class GpuTimer
template <typename Clock> class CpuTimer : public TimerBase {
public:
using clock_type = Clock;
void start() { mStart = Clock::now(); }
void stop() {
mStop = Clock::now();
mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
}
private:
std::chrono::time_point<Clock> mStart, mStop;
}; // class CpuTimer
using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
inline std::vector<std::string> splitString(std::string str,
char delimiter = ',') {
std::vector<std::string> splitVect;
std::stringstream ss(str);
std::string substr;
while (ss.good()) {
getline(ss, substr, delimiter);
splitVect.emplace_back(std::move(substr));
}
return splitVect;
}
// Return m rounded up to nearest multiple of n
inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
inline void loadLibrary(const std::string& path) {
#ifdef _MSC_VER
void* handle = LoadLibrary(path.c_str());
#else
int32_t flags{RTLD_LAZY};
#if ENABLE_ASAN
// https://github.com/google/sanitizers/issues/89
// asan doesn't handle module unloading correctly and there are no plans on
// doing
// so. In order to get proper stack traces, don't delete the shared library on
// close so that asan can resolve the symbols correctly.
flags |= RTLD_NODELETE;
#endif // ENABLE_ASAN
void* handle = dlopen(path.c_str(), flags);
#endif
if (handle == nullptr) {
#ifdef _MSC_VER
sample::gLogError << "Could not load plugin library: " << path << std::endl;
#else
sample::gLogError << "Could not load plugin library: " << path
<< ", due to: " << dlerror() << std::endl;
#endif
}
}
inline int32_t getSMVersion() {
int32_t deviceIndex = 0;
CHECK(cudaGetDevice(&deviceIndex));
int32_t major, minor;
CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
deviceIndex));
CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
deviceIndex));
return ((major << 8) | minor);
}
inline bool isSMSafe() {
const int32_t smVersion = getSMVersion();
return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
}
inline bool isDataTypeSupported(DataType dataType) {
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder) {
return false;
}
if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
(dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
return false;
}
return true;
}
} // namespace samplesCommon
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
os << "(";
for (int i = 0; i < dims.nbDims; ++i) {
os << (i ? ", " : "") << dims.d[i];
}
return os << ")";
}
#endif // TENSORRT_COMMON_H

View File

@@ -1,223 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "getOptions.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstring>
#include <set>
namespace nvinfer1 {
namespace utility {
//! Matching for TRTOptions is defined as follows:
//!
//! If A and B both have longName set, A matches B if and only if A.longName ==
//! B.longName and (A.shortName == B.shortName if both have short name set).
//!
//! If A only has shortName set and B only has longName set, then A does not
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
//! the definition of a TRTOption in the input to getOptions. As such, if the
//! definition only has shortName set, it will never be equal to a TRTOption
//! that does not have shortName set (and same for longName).
//!
//! If A and B both have shortName set but B does not have longName set, A
//! matches B if and only if A.shortName == B.shortName.
//!
//! If A has neither long or short name set, A matches B if and only if B has
//! neither long or short name set.
bool matches(const TRTOption& a, const TRTOption& b) {
if (!a.longName.empty() && !b.longName.empty()) {
if (a.shortName && b.shortName) {
return (a.longName == b.longName) && (a.shortName == b.shortName);
}
return a.longName == b.longName;
}
// If only one of them is not set, this will return false anyway.
return a.shortName == b.shortName;
}
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
//! TRTOptions, -1 if not found.
int getTRTOptionIndex(const std::vector<TRTOption>& options,
const TRTOption& opt) {
for (size_t i = 0; i < options.size(); ++i) {
if (matches(opt, options[i])) {
return i;
}
}
return -1;
}
//! validateTRTOption will return a string containing an error message if
//! options
//! contain non-numeric characters, or if there are duplicate option names
//! found.
//! Otherwise, returns the empty string.
std::string validateTRTOption(const std::set<char>& seenShortNames,
const std::set<std::string>& seenLongNames,
const TRTOption& opt) {
if (opt.shortName != 0) {
if (!std::isalnum(opt.shortName)) {
return "Short name '" + std::to_string(opt.shortName) +
"' is non-alphanumeric";
}
if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
return "Short name '" + std::to_string(opt.shortName) +
"' is a duplicate";
}
}
if (!opt.longName.empty()) {
for (const char& c : opt.longName) {
if (!std::isalnum(c) && c != '-' && c != '_') {
return "Long name '" + opt.longName +
"' contains characters that are not '-', '_', or alphanumeric";
}
}
if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
return "Long name '" + opt.longName + "' is a duplicate";
}
}
return "";
}
//! validateTRTOptions will return a string containing an error message if any
//! options contain non-numeric characters, or if there are duplicate option
//! names found. Otherwise, returns the empty string.
std::string validateTRTOptions(const std::vector<TRTOption>& options) {
std::set<char> seenShortNames;
std::set<std::string> seenLongNames;
for (size_t i = 0; i < options.size(); ++i) {
const std::string errMsg =
validateTRTOption(seenShortNames, seenLongNames, options[i]);
if (!errMsg.empty()) {
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
}
seenShortNames.insert(options[i].shortName);
seenLongNames.insert(options[i].longName);
}
return "";
}
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
//! fields set accordingly. Assumes that options is validated.
//! ErrMsg will be set if:
//! - an argument is null
//! - an argument is empty
//! - an argument does not have option (i.e. "-" and "--")
//! - a short argument has more than 1 character
//! - the last argument in the list requires a value
TRTParsedArgs parseArgs(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
TRTParsedArgs parsedArgs;
parsedArgs.values.resize(options.size());
for (int i = 1; i < argc; ++i) // index of current command-line argument
{
if (argv[i] == nullptr) {
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
}
const std::string argStr(argv[i]);
if (argStr.empty()) {
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
}
// No starting hyphen means it is a positional argument
if (argStr[0] != '-') {
parsedArgs.positionalArgs.push_back(argStr);
continue;
}
if (argStr == "-" || argStr == "--") {
return TRTParsedArgs{"Argument does not specify an option at index " +
std::to_string(i)};
}
// If only 1 hyphen, char after is the flag.
TRTOption opt{' ', "", false, ""};
std::string value;
if (argStr[1] != '-') {
// Must only have 1 char after the hyphen
if (argStr.size() > 2) {
return TRTParsedArgs{
"Short arg contains more than 1 character at index " +
std::to_string(i)};
}
opt.shortName = argStr[1];
} else {
opt.longName = argStr.substr(2);
// We need to support --foo=bar syntax, so look for '='
const size_t eqIndex = opt.longName.find('=');
if (eqIndex < opt.longName.size()) {
value = opt.longName.substr(eqIndex + 1);
opt.longName = opt.longName.substr(0, eqIndex);
}
}
const int idx = getTRTOptionIndex(options, opt);
if (idx < 0) {
continue;
}
if (options[idx].valueRequired) {
if (!value.empty()) {
parsedArgs.values[idx].second.push_back(value);
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
continue;
}
if (i + 1 >= argc) {
return TRTParsedArgs{"Last argument requires value, but none given"};
}
const std::string nextArg(argv[i + 1]);
if (nextArg.size() >= 1 && nextArg[0] == '-') {
sample::gLogWarning << "Warning: Using '" << nextArg
<< "' as a value for '" << argStr
<< "', Should this be its own flag?" << std::endl;
}
parsedArgs.values[idx].second.push_back(nextArg);
i += 1; // Next argument already consumed
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
} else {
parsedArgs.values[idx].first += 1;
}
}
return parsedArgs;
}
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
const std::string errMsg = validateTRTOptions(options);
if (!errMsg.empty()) {
return TRTParsedArgs{errMsg};
}
return parseArgs(argc, argv, options);
}
} // namespace utility
} // namespace nvinfer1

View File

@@ -1,128 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_GET_OPTIONS_H
#define TRT_GET_OPTIONS_H
#include <string>
#include <utility>
#include <vector>
namespace nvinfer1 {
namespace utility {
//! TRTOption defines a command line option. At least 1 of shortName and
//! longName
//! must be defined.
//! If bool initialization is undefined behavior on your system, valueRequired
//! must also be explicitly defined.
//! helpText is optional.
struct TRTOption {
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
std::string longName; //!< Option name in long (double hyphen) form (i.e.
//!--foo, --bar)
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4,
//!--foo bar)
std::string helpText; //!< Text to show when printing out the command usage
};
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
//! argument list (argv).
//!
//! errMsg is a string containing an error message if any errors occurred. If it
//! is empty, no errors occurred.
//!
//! values stores a vector of pairs for each option (ordered by order in the
//! input). Each pair contains an int (the number of occurrences) and a vector
//! of strings (a list of values). The user should know which of these to use,
//! and which options required values. For non-value options, only occurrences
//! is
//! populated. For value-required options, occurrences == # of values. Values do
//! not need to be unique.
//!
//! positionalArgs stores additional arguments that are passed in without an
//! option (these must not start with a hyphen).
struct TRTParsedArgs {
std::string errMsg;
std::vector<std::pair<int, std::vector<std::string>>> values;
std::vector<std::string> positionalArgs;
};
//! Parse the input arguments passed to main() and extract options as well as
//! positional arguments.
//!
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
//!
//! If there is a single preceding hyphen, there should be exactly 1 character
//! after the hyphen, which is interpreted as the option.
//!
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
//! is interpreted as the option.
//!
//! If the option requires a value, the next argument is used as the value.
//!
//! Positional arguments must not start with a hyphen.
//!
//! If an argument requires a value, the next argument is interpreted as the
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
//! "--bar" as a value for option "foo" if "foo" requires a value).
//! We also support --name=value syntax. In this case, 'value' would be used as
//! the value, NOT the next argument.
//!
//! For options:
//! { { 'a', "", false },
//! { 'b', "", false },
//! { 0, "cee", false },
//! { 'd', "", true },
//! { 'e', "", true },
//! { 'f', "foo", true } }
//!
//! ./main hello world -a -a --cee -d 12 -f 34
//! and
//! ./main hello world -a -a --cee -d 12 --foo 34
//!
//! will result in:
//!
//! TRTParsedArgs {
//! errMsg: "",
//! values: { { 2, {} },
//! { 0, {} },
//! { 1, {} },
//! { 1, {"12"} },
//! { 0, {} },
//! { 1, {"34"} } }
//! positionalArgs: {"hello", "world"},
//! }
//!
//! Non-POSIX behavior:
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
//! option must have its own hyphen prefix.
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
//! whitespace-separated from the option it is for.
//!
//! @param[in] argc The number of arguments passed to main (including the
//! file name, which is disregarded)
//! @param[in] argv The arguments passed to main (including the file name,
//! which is disregarded)
//! @param[in] options List of TRTOptions to parse
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
//! the fields.
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options);
} // namespace utility
} // namespace nvinfer1
#endif // TRT_GET_OPTIONS_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,38 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "logger.h"
#include "ErrorRecorder.h"
#include "logging.h"
SampleErrorRecorder gRecorder;
namespace sample {
Logger gLogger{Logger::Severity::kINFO};
LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
void setReportableSeverity(Logger::Severity severity) {
gLogger.setReportableSeverity(severity);
gLogVerbose.setReportableSeverity(severity);
gLogInfo.setReportableSeverity(severity);
gLogWarning.setReportableSeverity(severity);
gLogError.setReportableSeverity(severity);
gLogFatal.setReportableSeverity(severity);
}
} // namespace sample

View File

@@ -1,35 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include "logging.h"
class SampleErrorRecorder;
extern SampleErrorRecorder gRecorder;
namespace sample {
extern Logger gLogger;
extern LogStreamConsumer gLogVerbose;
extern LogStreamConsumer gLogInfo;
extern LogStreamConsumer gLogWarning;
extern LogStreamConsumer gLogError;
extern LogStreamConsumer gLogFatal;
void setReportableSeverity(Logger::Severity severity);
} // namespace sample
#endif // LOGGER_H

View File

@@ -1,573 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include "sampleOptions.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ostream>
#include <sstream>
#include <string>
namespace sample {
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf {
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
: mOutput(other.mOutput), mPrefix(other.mPrefix),
mShouldLog(other.mShouldLog) {}
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
LogStreamConsumerBuffer() = delete;
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
~LogStreamConsumerBuffer() override {
// std::streambuf::pbase() gives a pointer to the beginning of the buffered
// part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the
// output sequence
// if the pointer to the beginning is not equal to the pointer to the
// current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr()) {
putOutput();
}
}
//!
//! synchronizes the stream buffer and returns 0 on success
//! synchronizing the stream buffer consists of inserting the buffer contents
//! into the stream,
//! resetting the buffer and flushing the stream
//!
int32_t sync() override {
putOutput();
return 0;
}
void putOutput() {
if (mShouldLog) {
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
mOutput << "[";
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
<< "/";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
<< "-";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into
// the stream
mOutput << mPrefix << str();
}
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog{};
}; // class LogStreamConsumerBuffer
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
//! std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase {
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mBuffer(stream, prefix, shouldLog) {}
protected:
std::mutex mLogMutex;
LogStreamConsumerBuffer mBuffer;
}; // class LogStreamConsumerBase
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when
//! logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the
//! LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to
//! std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from
//! being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
public:
//!
//! \brief Creates a LogStreamConsumer which logs messages with level
//! severity.
//! Reportable severity determines if the messages are severe enough to be
//! logged.
//!
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
nvinfer1::ILogger::Severity severity)
: LogStreamConsumerBase(severityOstream(severity),
severityPrefix(severity),
severity <= reportableSeverity),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
LogStreamConsumer(LogStreamConsumer&& other) noexcept
: LogStreamConsumerBase(severityOstream(other.mSeverity),
severityPrefix(other.mSeverity),
other.mShouldLog),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
LogStreamConsumer(const LogStreamConsumer& other) = delete;
LogStreamConsumer() = delete;
~LogStreamConsumer() = default;
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
void setReportableSeverity(Severity reportableSeverity) {
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
std::mutex& getMutex() { return mLogMutex; }
bool getShouldLog() const { return mShouldLog; }
private:
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
bool mShouldLog;
Severity mSeverity;
}; // class LogStreamConsumer
template <typename T>
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << obj;
}
return logger;
}
//!
//! Special handling std::endl
//!
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
std::ostream& (*f)(std::ostream&)) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << f;
}
return logger;
}
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
const nvinfer1::Dims& dims) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
}
return logger;
}
//!
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and
//! samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or
//! internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to
//! emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output
//! is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results
//! to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
//! duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits
//! directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between
//! messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to
//! access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
//! implementation a member of the Logger
//! object.
//!
class Logger : public nvinfer1::ILogger {
public:
explicit Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity) {}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult {
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger
//! associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the
//! logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will
//! eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) noexcept override {
LogStreamConsumer(mReportableSeverity, severity)
<< "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of
//! this level or higher.
//!
void setReportableSeverity(Severity severity) noexcept {
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print
//! test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom
//! that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom {
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started), mName(name), mCmdline(cmdline) {}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting
//! with
//! "TensorRT" and containing dot-separated strings
//! containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name,
const std::string& cmdline) {
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an
//! array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, int32_t argc,
char const* const* argv) {
// Append TensorRT version as info
const std::string vname =
name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
auto cmdline = genCmdlineString(argc, argv);
return defineTest(vname, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom) {
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of
//! TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int32_t reportPass(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int32_t reportFail(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int32_t reportWaive(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int32_t reportTest(TestAtom const& testAtom, bool pass) {
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const { return mReportableSeverity; }
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the
//! given severity
//!
static const char* severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message
//! with the given result
//!
static const char* testResultString(TestResult result) {
switch (result) {
case TestResult::kRUNNING:
return "RUNNING";
case TestResult::kPASSED:
return "PASSED";
case TestResult::kFAILED:
return "FAILED";
case TestResult::kWAIVED:
return "WAIVED";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the
//! given severity
//!
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(TestAtom const& testAtom, TestResult result) {
severityOstream(Severity::kINFO)
<< "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int32_t argc, char const* const* argv) {
std::stringstream ss;
for (int32_t i = 0; i < argc; i++) {
if (i > 0) {
ss << " ";
}
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
}; // class Logger
namespace {
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINTERNAL_ERROR
//! ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(),
Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
} // namespace sample
#endif // TENSORRT_LOGGING_H

View File

@@ -1,126 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PARSER_ONNX_CONFIG_H
#define PARSER_ONNX_CONFIG_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
#include "NvOnnxParser.h"
#define ONNX_DEBUG 1
/**
* \class ParserOnnxConfig
* \brief Configuration Manager Class Concrete Implementation
*
* \note:
*
*/
using namespace std;
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
protected:
string mModelFilename{};
string mTextFilename{};
string mFullTextFilename{};
nvinfer1::DataType mModelDtype;
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
bool mPrintLayercInfo;
public:
ParserOnnxConfig()
: mModelDtype(nvinfer1::DataType::kFLOAT),
mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
mPrintLayercInfo(false) {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~ParserOnnxConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
mModelDtype = modelDtype;
}
virtual nvinfer1::DataType getModelDtype() const noexcept {
return mModelDtype;
}
virtual const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
virtual void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = string(onnxFilename);
}
virtual nvonnxparser::IOnnxConfig::Verbosity
getVerbosityLevel() const noexcept {
return mVerbosity;
}
virtual void addVerbosity() noexcept { ++mVerbosity; }
virtual void reduceVerbosity() noexcept { --mVerbosity; }
virtual void
setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
mVerbosity = verbosity;
}
virtual const char* getTextFileName() const noexcept {
return mTextFilename.c_str();
}
virtual void setTextFileName(const char* textFilename) noexcept {
mTextFilename = string(textFilename);
}
virtual const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = string(fullTextFilename);
}
virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
virtual void setPrintLayerInfo(bool src) noexcept {
mPrintLayercInfo = src;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
virtual bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
virtual void destroy() noexcept { delete this; }
}; // class ParserOnnxConfig
#endif

View File

@@ -1,65 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_SAFE_COMMON_H
#define TENSORRT_SAFE_COMMON_H
#include "NvInferRuntimeCommon.h"
#include <cstdlib>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
#define CHECK(status) \
do { \
auto ret = (status); \
if (ret != 0) { \
std::cerr << "Cuda failure: " << ret << std::endl; \
abort(); \
} \
} while (0)
namespace samplesCommon {
template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
if (!obj) {
throw std::runtime_error("Failed to create object");
}
return std::shared_ptr<T>(obj);
}
inline uint32_t elementSize(nvinfer1::DataType t) {
switch (t) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kINT8:
return 1;
case nvinfer1::DataType::kBOOL:
return 1;
}
return 0;
}
template <typename A, typename B> inline A divUp(A x, B n) {
return (x + n - 1) / n;
}
} // namespace samplesCommon
#endif // TENSORRT_SAFE_COMMON_H

View File

@@ -1,251 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SampleConfig_H
#define SampleConfig_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
class SampleConfig : public nvonnxparser::IOnnxConfig {
public:
enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
private:
std::string mModelFilename;
std::string mEngineFilename;
std::string mTextFilename;
std::string mFullTextFilename;
std::string mImageFilename;
std::string mReferenceFilename;
std::string mOutputFilename;
std::string mCalibrationFilename;
std::string mTimingCacheFilename;
int64_t mLabel{-1};
int64_t mMaxBatchSize{32};
int64_t mCalibBatchSize{0};
int64_t mMaxNCalibBatch{0};
int64_t mFirstCalibBatch{0};
int64_t mUseDLACore{-1};
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
bool mTF32{true};
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
bool mPrintLayercInfo{false};
bool mDebugBuilder{false};
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
uint64_t mTopK{0};
float mFailurePercentage{-1.0f};
float mTolerance{0.0f};
float mAbsTolerance{1e-5f};
public:
SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "SampleConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
void setModelDtype(const nvinfer1::DataType mdt) noexcept {
mModelDtype = mdt;
}
nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
bool getTF32() const noexcept { return mTF32; }
void setTF32(bool enabled) noexcept { mTF32 = enabled; }
const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = std::string(onnxFilename);
}
Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
void addVerbosity() noexcept { ++mVerbosity; }
void reduceVerbosity() noexcept { --mVerbosity; }
virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
const char* getEngineFileName() const noexcept {
return mEngineFilename.c_str();
}
void setEngineFileName(const char* engineFilename) noexcept {
mEngineFilename = std::string(engineFilename);
}
const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
void setTextFileName(const char* textFilename) noexcept {
mTextFilename = std::string(textFilename);
}
const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = std::string(fullTextFilename);
}
void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label
int64_t getLabel() const noexcept { return mLabel; } //!< get the Label
bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
void setPrintLayerInfo(bool b) noexcept {
mPrintLayercInfo = b;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
void setMaxBatchSize(int64_t maxBatchSize) noexcept {
mMaxBatchSize = maxBatchSize;
} //!< set the Max Batch Size
int64_t getMaxBatchSize() const noexcept {
return mMaxBatchSize;
} //!< get the Max Batch Size
void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
mCalibBatchSize = CalibBatchSize;
} //!< set the calibration batch size
int64_t getCalibBatchSize() const noexcept {
return mCalibBatchSize;
} //!< get calibration batch size
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
mMaxNCalibBatch = MaxNCalibBatch;
} //!< set Max Number of Calibration Batches
int64_t getMaxNCalibBatch() const noexcept {
return mMaxNCalibBatch;
} //!< get the Max Number of Calibration Batches
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
mFirstCalibBatch = FirstCalibBatch;
} //!< set the first calibration batch
int64_t getFirstCalibBatch() const noexcept {
return mFirstCalibBatch;
} //!< get the first calibration batch
void setUseDLACore(int64_t UseDLACore) noexcept {
mUseDLACore = UseDLACore;
} //!< set the DLA core to use
int64_t getUseDLACore() const noexcept {
return mUseDLACore;
} //!< get the DLA core to use
void setDebugBuilder() noexcept {
mDebugBuilder = true;
} //!< enable the Debug info, while building the engine.
bool getDebugBuilder() const noexcept {
return mDebugBuilder;
} //!< get the boolean variable, corresponding to the debug builder
const char*
getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
{
return mImageFilename.c_str();
}
void setImageFileName(
const char* imageFilename) noexcept //!< get the Image file name
{
mImageFilename = std::string(imageFilename);
}
const char* getReferenceFileName() const noexcept {
return mReferenceFilename.c_str();
}
void setReferenceFileName(
const char* referenceFilename) noexcept //!< set reference file name
{
mReferenceFilename = std::string(referenceFilename);
}
void setInputDataFormat(InputDataFormat idt) noexcept {
mInputDataFormat = idt;
} //!< specifies expected data format of the image file (PPM or ASCII)
InputDataFormat getInputDataFormat() const noexcept {
return mInputDataFormat;
} //!< returns the expected data format of the image file.
const char* getOutputFileName()
const noexcept //!< specifies the file to save the results
{
return mOutputFilename.c_str();
}
void setOutputFileName(
const char* outputFilename) noexcept //!< get the output file name
{
mOutputFilename = std::string(outputFilename);
}
const char* getCalibrationFileName() const noexcept {
return mCalibrationFilename.c_str();
} //!< specifies the file containing the list of image files for int8
//! calibration
void setCalibrationFileName(
const char* calibrationFilename) noexcept //!< get the int 8 calibration
//! list file name
{
mCalibrationFilename = std::string(calibrationFilename);
}
uint64_t getTopK() const noexcept { return mTopK; }
void setTopK(uint64_t topK) noexcept {
mTopK = topK;
} //!< If this options is specified, return the K top probabilities.
float getFailurePercentage() const noexcept { return mFailurePercentage; }
void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
float getTolerance() const noexcept { return mTolerance; }
void setTolerance(float t) noexcept { mTolerance = t; }
const char* getTimingCacheFilename() const noexcept {
return mTimingCacheFilename.c_str();
}
void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
mTimingCacheFilename = std::string(timingCacheFilename);
}
bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
void destroy() noexcept { delete this; }
}; // class SampleConfig
#endif

View File

@@ -1,397 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_DEVICE_H
#define TRT_SAMPLE_DEVICE_H
#include <cassert>
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <thread>
namespace sample {
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
if (ret != cudaSuccess) {
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
abort();
}
}
class TrtCudaEvent;
namespace {
void cudaSleep(void* sleep) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
}
} // namespace
//!
//! \class TrtCudaStream
//! \brief Managed CUDA stream
//!
class TrtCudaStream {
public:
TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
TrtCudaStream(const TrtCudaStream&) = delete;
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
TrtCudaStream(TrtCudaStream&&) = delete;
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
cudaStream_t get() const { return mStream; }
void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
void wait(TrtCudaEvent& event);
void sleep(float* ms) {
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
}
private:
cudaStream_t mStream{};
};
//!
//! \class TrtCudaEvent
//! \brief Managed CUDA event
//!
class TrtCudaEvent {
public:
explicit TrtCudaEvent(bool blocking = true) {
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
}
TrtCudaEvent(const TrtCudaEvent&) = delete;
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
TrtCudaEvent(TrtCudaEvent&&) = delete;
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
cudaEvent_t get() const { return mEvent; }
void record(const TrtCudaStream& stream) {
cudaCheck(cudaEventRecord(mEvent, stream.get()));
}
void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
// Returns time elapsed time in milliseconds
float operator-(const TrtCudaEvent& e) const {
float time{0};
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
return time;
}
private:
cudaEvent_t mEvent{};
};
inline void TrtCudaStream::wait(TrtCudaEvent& event) {
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
}
//!
//! \class TrtCudaGraph
//! \brief Managed CUDA graph
//!
class TrtCudaGraph {
public:
explicit TrtCudaGraph() = default;
TrtCudaGraph(const TrtCudaGraph&) = delete;
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
TrtCudaGraph(TrtCudaGraph&&) = delete;
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
~TrtCudaGraph() {
if (mGraphExec) {
cudaGraphExecDestroy(mGraphExec);
}
}
void beginCapture(TrtCudaStream& stream) {
cudaCheck(
cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
}
bool launch(TrtCudaStream& stream) {
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
}
void endCapture(TrtCudaStream& stream) {
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
cudaCheck(cudaGraphDestroy(mGraph));
}
void endCaptureOnError(TrtCudaStream& stream) {
// There are two possibilities why stream capture would fail:
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
// (2) TRT reports a failure.
// In case (1), the returning mGraph should be nullptr.
// In case (2), the returning mGraph is not nullptr, but it should not be
// used.
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
if (ret == cudaErrorStreamCaptureInvalidated) {
assert(mGraph == nullptr);
} else {
assert(ret == cudaSuccess);
assert(mGraph != nullptr);
cudaCheck(cudaGraphDestroy(mGraph));
mGraph = nullptr;
}
// Clean up any CUDA error.
cudaGetLastError();
sample::gLogWarning << "The CUDA graph capture on the stream has failed."
<< std::endl;
}
private:
cudaGraph_t mGraph{};
cudaGraphExec_t mGraphExec{};
};
//!
//! \class TrtCudaBuffer
//! \brief Managed buffer for host and device
//!
template <typename A, typename D> class TrtCudaBuffer {
public:
TrtCudaBuffer() = default;
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
TrtCudaBuffer(TrtCudaBuffer&& rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
if (this != &rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
return *this;
}
~TrtCudaBuffer() { reset(); }
TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
void allocate(size_t size) {
reset();
A()(&mPtr, size);
}
void reset(void* ptr = nullptr) {
if (mPtr) {
D()(mPtr);
}
mPtr = ptr;
}
void* get() const { return mPtr; }
private:
void* mPtr{nullptr};
};
struct DeviceAllocator {
void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
};
struct DeviceDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
};
struct ManagedAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocManaged(ptr, size));
}
};
struct HostAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocHost(ptr, size));
}
};
struct HostDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
};
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
//!
//! \class MirroredBuffer
//! \brief Coupled host and device buffers
//!
class IMirroredBuffer {
public:
//!
//! Allocate memory for the mirrored buffer give the size
//! of the allocation.
//!
virtual void allocate(size_t size) = 0;
//!
//! Get the pointer to the device side buffer.
//!
//! \return pointer to device memory or nullptr if uninitialized.
//!
virtual void* getDeviceBuffer() const = 0;
//!
//! Get the pointer to the host side buffer.
//!
//! \return pointer to host memory or nullptr if uninitialized.
//!
virtual void* getHostBuffer() const = 0;
//!
//! Copy the memory from host to device.
//!
virtual void hostToDevice(TrtCudaStream& stream) = 0;
//!
//! Copy the memory from device to host.
//!
virtual void deviceToHost(TrtCudaStream& stream) = 0;
//!
//! Interface to get the size of the memory
//!
//! \return the size of memory allocated.
//!
virtual size_t getSize() const = 0;
//!
//! Virtual destructor declaraion
//!
virtual ~IMirroredBuffer() = default;
}; // class IMirroredBuffer
//!
//! Class to have a seperate memory buffer for discrete device and host
//! allocations.
//!
class DiscreteMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mHostBuffer.allocate(size);
mDeviceBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
void* getHostBuffer() const { return mHostBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
cudaMemcpyHostToDevice, stream.get()));
}
void deviceToHost(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
cudaMemcpyDeviceToHost, stream.get()));
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtHostBuffer mHostBuffer;
TrtDeviceBuffer mDeviceBuffer;
}; // class DiscreteMirroredBuffer
//!
//! Class to have a unified memory buffer for embedded devices.
//!
class UnifiedMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mBuffer.get(); }
void* getHostBuffer() const { return mBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
void deviceToHost(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtManagedBuffer mBuffer;
}; // class UnifiedMirroredBuffer
inline void setCudaDevice(int device, std::ostream& os) {
cudaCheck(cudaSetDevice(device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// clang-format off
os << "=== Device Information ===" << std::endl;
os << "Selected Device: " << properties.name << std::endl;
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
os << "SMs: " << properties.multiProcessorCount << std::endl;
os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
// clang-format on
}
} // namespace sample
#endif // TRT_SAMPLE_DEVICE_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,195 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_ENGINES_H
#define TRT_SAMPLE_ENGINES_H
#include <iostream>
#include <vector>
//#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvInferConsistency.h"
#include "NvInferSafeRuntime.h"
#include "NvOnnxParser.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
struct Parser {
// TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
TrtUniquePtr<nvonnxparser::IParser> onnxParser;
operator bool() const { return false || onnxParser; }
};
struct BuildEnvironment {
TrtUniquePtr<INetworkDefinition> network;
//! Parser that creates the network. Must be declared *after* network, so that
//! when
//! ~BuildEnvironment() executes, the parser is destroyed before the network
//! is destroyed.
Parser parser;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<uint8_t> engineBlob;
};
//!
//! \brief Generate a network definition for a given model
//!
//! \return Parser The parser used to initialize the network and that holds the
//! weights for the network, or an invalid
//! parser (the returned parser converts to false if tested)
//!
//! Constant input dimensions in the model must not be changed in the
//! corresponding
//! network definition, because its correctness may rely on the constants.
//!
//! \see Parser::operator bool()
//!
Parser modelToNetwork(const ModelOptions& model,
nvinfer1::INetworkDefinition& network, std::ostream& err);
//!
//! \brief Set up network and config
//!
//! \return boolean Return true if network and config were successfully set
//!
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
IBuilder& builder, INetworkDefinition& network,
IBuilderConfig& config, std::ostream& err,
std::vector<std::vector<char>>& sparseWeights);
//!
//! \brief Log refittable layers and weights of a refittable engine
//!
void dumpRefittable(nvinfer1::ICudaEngine& engine);
//!
//! \brief Load a serialized engine
//!
//! \return Pointer to the engine loaded or nullptr if the operation failed
//!
nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
std::ostream& err);
//!
//! \brief Save an engine into a file
//!
//! \return boolean Return true if the engine was successfully saved
//!
bool saveEngine(const nvinfer1::ICudaEngine& engine,
const std::string& fileName, std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, BuildEnvironment& env,
std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys,
std::ostream& err) {
BuildEnvironment env;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
if (getEngineBuildEnv(model, build, sys, env, err)) {
engine.swap(env.engine);
}
return engine;
}
//!
//! \brief Create a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* networkToSerialized(const BuildOptions& build,
const SystemOptions& sys, IBuilder& builder,
INetworkDefinition& network,
std::ostream& err);
//!
//! \brief Tranfer model to a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* modelToSerialized(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
//!
//! \brief Serialize network and save it into a file
//!
//! \return boolean Return true if the network was successfully serialized and
//! saved
//!
bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
bool multiThreading);
//!
//! \brief Set tensor scales from a calibration table
//!
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
const std::vector<IOFormat>& inputFormats,
const std::vector<IOFormat>& outputFormats,
const std::string& calibrationFile);
//!
//! \brief Check if safe runtime is loaded.
//!
bool hasSafeRuntime();
//!
//! \brief Create a safe runtime object if the dynamic library is loaded.
//!
nvinfer1::safe::IRuntime*
createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
//!
//! \brief Check if consistency checker is loaded.
//!
bool hasConsistencyChecker();
//!
//! \brief Create a consistency checker object if the dynamic library is loaded.
//!
nvinfer1::consistency::IConsistencyChecker*
createConsistencyChecker(nvinfer1::ILogger& logger,
IHostMemory const* engine) noexcept;
//!
//! \brief Run consistency check on serialized engine.
//!
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
} // namespace sample
#endif // TRT_SAMPLE_ENGINES_H

View File

@@ -1,943 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <array>
#include <chrono>
#include <cuda_profiler_api.h>
#include <functional>
#include <limits>
#include <memory>
#include <mutex>
#include <numeric>
#include <thread>
#include <utility>
#include <vector>
#if defined(__QNX__)
#include <sys/neutrino.h>
#include <sys/syspage.h>
#endif
#include "NvInfer.h"
#include "ErrorRecorder.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleEngines.h"
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
#include "sampleUtils.h"
namespace sample {
template <class MapType, class EngineType>
bool validateTensorNames(const MapType& map, const EngineType* engine,
const int32_t endBindingIndex) {
// Check if the provided input tensor names match the input tensors of the
// engine.
// Throw an error if the provided input tensor names cannot be found because
// it implies a potential typo.
for (const auto& item : map) {
bool tensorNameFound{false};
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (engine->bindingIsInput(b) &&
engine->getBindingName(b) == item.first) {
tensorNameFound = true;
break;
}
}
if (!tensorNameFound) {
sample::gLogError
<< "Cannot find input tensor with name \"" << item.first
<< "\" in the engine bindings! "
<< "Please make sure the input tensor names are correct."
<< std::endl;
return false;
}
}
return true;
}
template <class EngineType, class ContextType> class FillBindingClosure {
private:
using InputsMap = std::unordered_map<std::string, std::string>;
using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
EngineType const* engine;
ContextType const* context;
InputsMap const& inputs;
BindingsVector& bindings;
int32_t batch;
int32_t endBindingIndex;
void fillOneBinding(int32_t bindingIndex, int64_t vol) {
auto const dims = getDims(bindingIndex);
auto const name = engine->getBindingName(bindingIndex);
auto const isInput = engine->bindingIsInput(bindingIndex);
auto const dataType = engine->getBindingDataType(bindingIndex);
auto const* bindingInOutStr = isInput ? "input" : "output";
for (auto& binding : bindings) {
const auto input = inputs.find(name);
if (isInput && input != inputs.end()) {
sample::gLogInfo << "Using values loaded from " << input->second
<< " for input " << name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType,
input->second);
} else {
sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
<< name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType);
}
sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
<< name << " with dimensions " << dims << std::endl;
}
}
bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
if (!validateTensorNames(inputs, engine, endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --loadInputs flag."
<< std::endl;
return false;
}
for (int32_t b = 0; b < endBindingIndex; b++) {
auto const dims = getDims(b);
auto const comps = engine->getBindingComponentsPerElement(b);
auto const strides = context->getStrides(b);
int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
fillOneBinding(b, vol);
}
return true;
}
Dims getDims(int32_t bindingIndex);
public:
FillBindingClosure(EngineType const* _engine, ContextType const* _context,
InputsMap const& _inputs, BindingsVector& _bindings,
int32_t _batch, int32_t _endBindingIndex)
: engine(_engine), context(_context), inputs(_inputs),
bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
bool operator()() { return fillAllBindings(batch, endBindingIndex); }
};
template <>
Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
getDims(int32_t bindingIndex) {
return context->getBindingDimensions(bindingIndex);
}
template <>
Dims FillBindingClosure<
nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
return engine->getBindingDimensions(bindingIndex);
}
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference) {
int32_t device{};
cudaCheck(cudaGetDevice(&device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// Use managed memory on integrated devices when transfers are skipped
// and when it is explicitly requested on the commandline.
bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
inference.useManaged};
using FillSafeBindings =
FillBindingClosure<nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
auto* safeEngine = iEnv.safeEngine.get();
for (int32_t s = 0; s < inference.streams; ++s) {
iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
const int32_t nBindings = safeEngine->getNbBindings();
auto const* safeContext = iEnv.safeContext.front().get();
// batch is set to 1 because safety only support explicit batch.
return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
inference.inputs, iEnv.bindings, 1, nBindings)();
}
using FillStdBindings =
FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
for (int32_t s = 0; s < inference.streams; ++s) {
auto ec = iEnv.engine->createExecutionContext();
if (ec == nullptr) {
sample::gLogError << "Unable to create execution context for stream " << s
<< "." << std::endl;
return false;
}
iEnv.context.emplace_back(ec);
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
if (iEnv.profiler) {
iEnv.context.front()->setProfiler(iEnv.profiler.get());
// Always run reportToProfiler() after enqueue launch
iEnv.context.front()->setEnqueueEmitsProfile(false);
}
const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
const int32_t nBindings = iEnv.engine->getNbBindings();
const int32_t bindingsInProfile =
nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
const int32_t endBindingIndex =
bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
if (nOptProfiles > 1) {
sample::gLogWarning << "Multiple profiles are currently not supported. "
"Running with one profile."
<< std::endl;
}
// Make sure that the tensor names provided in command-line args actually
// exist in any of the engine bindings
// to avoid silent typos.
if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --shapes flag."
<< std::endl;
return false;
}
// Set all input dimensions before all bindings can be allocated
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (iEnv.engine->bindingIsInput(b)) {
auto dims = iEnv.context.front()->getBindingDimensions(b);
const bool isScalar = dims.nbDims == 0;
const bool isDynamicInput =
std::any_of(dims.d, dims.d + dims.nbDims,
[](int32_t dim) { return dim == -1; }) ||
iEnv.engine->isShapeBinding(b);
if (isDynamicInput) {
auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
std::vector<int32_t> staticDims;
if (shape == inference.shapes.end()) {
// If no shape is provided, set dynamic dimensions to 1.
constexpr int32_t DEFAULT_DIMENSION = 1;
if (iEnv.engine->isShapeBinding(b)) {
if (isScalar) {
staticDims.push_back(1);
} else {
staticDims.resize(dims.d[0]);
std::fill(staticDims.begin(), staticDims.end(),
DEFAULT_DIMENSION);
}
} else {
staticDims.resize(dims.nbDims);
std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
[&](int32_t dimension) {
return dimension >= 0 ? dimension
: DEFAULT_DIMENSION;
});
}
sample::gLogWarning << "Dynamic dimensions required for input: "
<< iEnv.engine->getBindingName(b)
<< ", but no shapes were provided. Automatically "
"overriding shape to: "
<< staticDims << std::endl;
} else if (inference.inputs.count(shape->first) &&
iEnv.engine->isShapeBinding(b)) {
if (isScalar || dims.nbDims == 1) {
// Load shape tensor from file.
size_t const size = isScalar ? 1 : dims.d[0];
staticDims.resize(size);
auto const& filename = inference.inputs.at(shape->first);
auto dst = reinterpret_cast<char*>(staticDims.data());
loadFromFile(filename, dst,
size * sizeof(decltype(staticDims)::value_type));
} else {
sample::gLogWarning << "Cannot load shape tensor " << shape->first
<< " from file, "
<< "ND-Shape isn't supported yet" << std::endl;
// Fallback
staticDims = shape->second;
}
} else {
staticDims = shape->second;
}
for (auto& c : iEnv.context) {
if (iEnv.engine->isShapeBinding(b)) {
if (!c->setInputShapeBinding(b, staticDims.data())) {
return false;
}
} else {
if (!c->setBindingDimensions(b, toDims(staticDims))) {
return false;
}
}
}
}
}
}
auto* engine = iEnv.engine.get();
auto const* context = iEnv.context.front().get();
int32_t const batch =
engine->hasImplicitBatchDimension() ? inference.batch : 1;
return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
batch, endBindingIndex)();
}
namespace {
#if defined(__QNX__)
using TimePoint = double;
#else
using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
#endif
TimePoint getCurrentTime() {
#if defined(__QNX__)
uint64_t const currentCycles = ClockCycles();
uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
// Return current timestamp in ms.
return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
#else
return std::chrono::high_resolution_clock::now();
#endif
}
//!
//! \struct SyncStruct
//! \brief Threads synchronization structure
//!
struct SyncStruct {
std::mutex mutex;
TrtCudaStream mainStream;
TrtCudaEvent gpuStart{cudaEventBlockingSync};
TimePoint cpuStart{};
float sleep{};
};
struct Enqueue {
explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
: mContext(context), mBuffers(buffers) {}
nvinfer1::IExecutionContext& mContext;
void** mBuffers{};
};
//!
//! \class EnqueueImplicit
//! \brief Functor to enqueue inference with implict batch
//!
class EnqueueImplicit : private Enqueue {
public:
explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
int32_t batch)
: Enqueue(context, buffers), mBatch(batch) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueue()"
<< std::endl;
}
return true;
}
return false;
}
private:
int32_t mBatch;
};
//!
//! \class EnqueueExplicit
//! \brief Functor to enqueue inference with explict batch
//!
class EnqueueExplicit : private Enqueue {
public:
explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
: Enqueue(context, buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueueV2()"
<< std::endl;
}
return true;
}
return false;
}
};
//!
//! \class EnqueueGraph
//! \brief Functor to enqueue inference from CUDA Graph
//!
class EnqueueGraph {
public:
explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
TrtCudaGraph& graph)
: mGraph(graph), mContext(context) {}
bool operator()(TrtCudaStream& stream) const {
if (mGraph.launch(stream)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.reportToProfiler()) {
gLogWarning << "Failed to collect layer timing info from previous CUDA "
"graph launch"
<< std::endl;
}
return true;
}
return false;
}
TrtCudaGraph& mGraph;
nvinfer1::IExecutionContext& mContext;
};
//!
//! \class EnqueueSafe
//! \brief Functor to enqueue safe execution context
//!
class EnqueueSafe {
public:
explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
void** buffers)
: mContext(context), mBuffers(buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
return true;
}
return false;
}
nvinfer1::safe::IExecutionContext& mContext;
void** mBuffers{};
};
using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
enum class StreamType : int32_t {
kINPUT = 0,
kCOMPUTE = 1,
kOUTPUT = 2,
kNUM = 3
};
enum class EventType : int32_t {
kINPUT_S = 0,
kINPUT_E = 1,
kCOMPUTE_S = 2,
kCOMPUTE_E = 3,
kOUTPUT_S = 4,
kOUTPUT_E = 5,
kNUM = 6
};
using MultiStream =
std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
static_cast<int32_t>(EventType::kNUM)>;
using EnqueueTimes = std::array<TimePoint, 2>;
//!
//! \class Iteration
//! \brief Inference iteration and streams management
//!
template <class ContextType> class Iteration {
public:
Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
Bindings& bindings)
: mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
mContext(&context) {
for (int32_t d = 0; d < mDepth; ++d) {
for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
}
}
createEnqueueFunction(inference, context, bindings);
}
bool query(bool skipTransfers) {
if (mActive[mNext]) {
return true;
}
if (!skipTransfers) {
record(EventType::kINPUT_S, StreamType::kINPUT);
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
record(EventType::kINPUT_E, StreamType::kINPUT);
wait(EventType::kINPUT_E,
StreamType::kCOMPUTE); // Wait for input DMA before compute
}
record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
recordEnqueueTime();
if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
return false;
}
recordEnqueueTime();
record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
if (!skipTransfers) {
wait(EventType::kCOMPUTE_E,
StreamType::kOUTPUT); // Wait for compute before output DMA
record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
}
mActive[mNext] = true;
moveNext();
return true;
}
float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
if (mActive[mNext]) {
if (skipTransfers) {
getEvent(EventType::kCOMPUTE_E).synchronize();
} else {
getEvent(EventType::kOUTPUT_E).synchronize();
}
trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
mActive[mNext] = false;
return getEvent(EventType::kCOMPUTE_S) - gpuStart;
}
return 0;
}
void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
for (int32_t d = 0; d < mDepth; ++d) {
sync(cpuStart, gpuStart, trace, skipTransfers);
moveNext();
}
}
void wait(TrtCudaEvent& gpuStart) {
getStream(StreamType::kINPUT).wait(gpuStart);
}
void setInputData() {
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
}
void fetchOutputData() {
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
}
private:
void moveNext() { mNext = mDepth - 1 - mNext; }
TrtCudaStream& getStream(StreamType t) {
return mStream[static_cast<int32_t>(t)];
}
TrtCudaEvent& getEvent(EventType t) {
return *mEvents[mNext][static_cast<int32_t>(t)];
}
void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
void recordEnqueueTime() {
mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
enqueueStart = 1 - enqueueStart;
}
TimePoint getEnqueueTime(bool start) {
return mEnqueueTimes[mNext][start ? 0 : 1];
}
void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
InferenceTrace getTrace(const TimePoint& cpuStart,
const TrtCudaEvent& gpuStart, bool skipTransfers) {
float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_S) - gpuStart;
float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_E) - gpuStart;
float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_S) - gpuStart;
float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_E) - gpuStart;
return InferenceTrace(mStreamId,
std::chrono::duration<float, std::milli>(
getEnqueueTime(true) - cpuStart)
.count(),
std::chrono::duration<float, std::milli>(
getEnqueueTime(false) - cpuStart)
.count(),
is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
}
void createEnqueueFunction(const InferenceOptions& inference,
nvinfer1::IExecutionContext& context,
Bindings& bindings) {
if (inference.batch) {
mEnqueue = EnqueueFunction(EnqueueImplicit(
context, mBindings.getDeviceBuffers(), inference.batch));
} else {
mEnqueue = EnqueueFunction(
EnqueueExplicit(context, mBindings.getDeviceBuffers()));
}
if (inference.graph) {
TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
// Avoid capturing initialization calls by executing the enqueue function
// at least
// once before starting CUDA graph capture.
const auto ret = mEnqueue(stream);
assert(ret);
stream.synchronize();
mGraph.beginCapture(stream);
// The built TRT engine may contain operations that are not permitted
// under CUDA graph capture mode.
// When the stream is capturing, the enqueue call may return false if the
// current CUDA graph capture fails.
if (mEnqueue(stream)) {
mGraph.endCapture(stream);
mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
} else {
mGraph.endCaptureOnError(stream);
// Ensure any CUDA error has been cleaned up.
cudaCheck(cudaGetLastError());
sample::gLogWarning << "The built TensorRT engine contains operations "
"that are not permitted under "
"CUDA graph capture mode."
<< std::endl;
sample::gLogWarning << "The specified --useCudaGraph flag has been "
"ignored. The inference will be "
"launched without using CUDA graph launch."
<< std::endl;
}
}
}
void createEnqueueFunction(const InferenceOptions&,
nvinfer1::safe::IExecutionContext& context,
Bindings&) {
mEnqueue =
EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
}
Bindings& mBindings;
TrtCudaGraph mGraph;
EnqueueFunction mEnqueue;
int32_t mStreamId{0};
int32_t mNext{0};
int32_t mDepth{2}; // default to double buffer to hide DMA transfers
std::vector<bool> mActive;
MultiStream mStream;
std::vector<MultiEvent> mEvents;
int32_t enqueueStart{0};
std::vector<EnqueueTimes> mEnqueueTimes;
ContextType* mContext{nullptr};
};
template <class ContextType>
bool inferenceLoop(
std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
bool skipTransfers, float idleMs) {
float durationMs = 0;
int32_t skip = 0;
for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
++i) {
for (auto& s : iStreams) {
if (!s->query(skipTransfers)) {
return false;
}
}
for (auto& s : iStreams) {
durationMs = std::max(durationMs,
s->sync(cpuStart, gpuStart, trace, skipTransfers));
}
if (durationMs < warmupMs) // Warming up
{
if (durationMs) // Skip complete iterations
{
++skip;
}
continue;
}
if (idleMs != 0.F) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(idleMs));
}
}
for (auto& s : iStreams) {
s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
}
return true;
}
template <class ContextType>
void inferenceExecution(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
const int32_t threadIdx, const int32_t streamsPerThread,
int32_t device, std::vector<InferenceTrace>& trace) {
float warmupMs = inference.warmup;
float durationMs = inference.duration * 1000.F + warmupMs;
cudaCheck(cudaSetDevice(device));
std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
for (int32_t s = 0; s < streamsPerThread; ++s) {
const int32_t streamId{threadIdx * streamsPerThread + s};
auto* iteration = new Iteration<ContextType>(
streamId, inference, *iEnv.template getContext<ContextType>(streamId),
*iEnv.bindings[streamId]);
if (inference.skipTransfers) {
iteration->setInputData();
}
iStreams.emplace_back(iteration);
}
for (auto& s : iStreams) {
s->wait(sync.gpuStart);
}
std::vector<InferenceTrace> localTrace;
if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
inference.iterations, durationMs, warmupMs, localTrace,
inference.skipTransfers, inference.idle)) {
iEnv.error = true;
}
if (inference.skipTransfers) {
for (auto& s : iStreams) {
s->fetchOutputData();
}
}
sync.mutex.lock();
trace.insert(trace.end(), localTrace.begin(), localTrace.end());
sync.mutex.unlock();
}
inline std::thread makeThread(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
int32_t threadIdx, int32_t streamsPerThread,
int32_t device,
std::vector<InferenceTrace>& trace) {
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
} // namespace
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace) {
cudaCheck(cudaProfilerStart());
trace.resize(0);
SyncStruct sync;
sync.sleep = inference.sleep;
sync.mainStream.sleep(&sync.sleep);
sync.cpuStart = getCurrentTime();
sync.gpuStart.record(sync.mainStream);
// When multiple streams are used, trtexec can run inference in two modes:
// (1) if inference.threads is true, then run each stream on each thread.
// (2) if inference.threads is false, then run all streams on the same thread.
const int32_t numThreads = inference.threads ? inference.streams : 1;
const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
std::vector<std::thread> threads;
for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
streamsPerThread, device, trace));
}
for (auto& th : threads) {
th.join();
}
cudaCheck(cudaProfilerStop());
auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
return a.h2dStart < b.h2dStart;
};
std::sort(trace.begin(), trace.end(), cmpTrace);
return !iEnv.error;
}
namespace {
size_t reportGpuMemory() {
static size_t prevFree{0};
size_t free{0};
size_t total{0};
size_t newlyAllocated{0};
cudaCheck(cudaMemGetInfo(&free, &total));
sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
if (prevFree != 0) {
newlyAllocated = (prevFree - free);
sample::gLogInfo << ", newly allocated GPU memory = "
<< newlyAllocated / 1024.0_MiB << " GiB";
}
sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
<< std::endl;
prevFree = free;
return newlyAllocated;
}
} // namespace
//! Returns true if deserialization is slower than expected or fails.
bool timeDeserialize(InferenceEnvironment& iEnv) {
constexpr int32_t kNB_ITERS{20};
std::unique_ptr<IRuntime> rt{
createInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<ICudaEngine> engine;
std::unique_ptr<safe::IRuntime> safeRT{
sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<safe::ICudaEngine> safeEngine;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
safeRT->setErrorRecorder(&gRecorder);
}
auto timeDeserializeFn = [&]() -> float {
bool deserializeOK{false};
engine.reset(nullptr);
safeEngine.reset(nullptr);
auto startClock = std::chrono::high_resolution_clock::now();
if (iEnv.safe) {
safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size()));
deserializeOK = (safeEngine != nullptr);
} else {
engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size(), nullptr));
deserializeOK = (engine != nullptr);
}
auto endClock = std::chrono::high_resolution_clock::now();
// return NAN if deserialization failed.
return deserializeOK
? std::chrono::duration<float, std::milli>(endClock - startClock)
.count()
: NAN;
};
// Warmup the caches to make sure that cache thrashing isn't throwing off the
// results
{
sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
for (int32_t i = 0, e = 2; i < e; ++i) {
timeDeserializeFn();
}
}
sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
float const first = timeDeserializeFn();
// Check if first deserialization suceeded.
if (std::isnan(first)) {
sample::gLogError << "Engine deserialization failed." << std::endl;
return true;
}
sample::gLogInfo << "First deserialization time = " << first
<< " milliseconds" << std::endl;
// Record initial gpu memory state.
reportGpuMemory();
float totalTime{0.F};
for (int32_t i = 0; i < kNB_ITERS; ++i) {
totalTime += timeDeserializeFn();
}
const auto averageTime = totalTime / kNB_ITERS;
// reportGpuMemory sometimes reports zero after a single deserialization of a
// small engine,
// so use the size of memory for all the iterations.
const auto totalEngineSizeGpu = reportGpuMemory();
sample::gLogInfo << "Total deserialization time = " << totalTime
<< " milliseconds in " << kNB_ITERS
<< " iterations, average time = " << averageTime
<< " milliseconds, first time = " << first
<< " milliseconds." << std::endl;
sample::gLogInfo << "Deserialization Bandwidth = "
<< 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
<< std::endl;
// If the first deserialization is more than tolerance slower than
// the average deserialization, return true, which means an error occurred.
// The tolerance is set to 2x since the deserialization time is quick and
// susceptible
// to caching issues causing problems in the first timing.
const auto tolerance = 2.0F;
const bool isSlowerThanExpected = first > averageTime * tolerance;
if (isSlowerThanExpected) {
sample::gLogInfo << "First deserialization time divided by average time is "
<< (first / averageTime) << ". Exceeds tolerance of "
<< tolerance << "x." << std::endl;
}
return isSlowerThanExpected;
}
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format) {
auto runtime = std::unique_ptr<IRuntime>(
createInferRuntime(sample::gLogger.getTRTLogger()));
auto inspector =
std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
if (!iEnv.context.empty()) {
inspector->setExecutionContext(iEnv.context.front().get());
}
std::string result = inspector->getEngineInformation(format);
return result;
}
} // namespace sample

View File

@@ -1,88 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_INFERENCE_H
#define TRT_SAMPLE_INFERENCE_H
#include "sampleReporting.h"
#include "sampleUtils.h"
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "NvInferSafeRuntime.h"
namespace sample {
struct InferenceEnvironment {
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<Profiler> profiler;
std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
std::vector<std::unique_ptr<Bindings>> bindings;
bool error{false};
std::vector<uint8_t> engineBlob;
bool safe{false};
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
template <class ContextType>
inline ContextType* getContext(int32_t streamIdx);
};
template <>
inline nvinfer1::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return context[streamIdx].get();
}
template <>
inline nvinfer1::safe::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return safeContext[streamIdx].get();
}
//!
//! \brief Set up contexts and bindings for inference
//!
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference);
//!
//! \brief Deserialize the engine and time how long it takes.
//!
bool timeDeserialize(InferenceEnvironment& iEnv);
//!
//! \brief Run inference and collect timing, return false if any error hit
//! during inference
//!
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace);
//!
//! \brief Get layer information of the engine.
//!
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format);
} // namespace sample
#endif // TRT_SAMPLE_INFERENCE_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,311 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_OPTIONS_H
#define TRT_SAMPLE_OPTIONS_H
#include <algorithm>
#include <array>
#include <iostream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "NvInfer.h"
namespace sample {
// Build default params
constexpr int32_t maxBatchNotProvided{0};
constexpr int32_t defaultMinTiming{1};
constexpr int32_t defaultAvgTiming{8};
// System default params
constexpr int32_t defaultDevice{0};
// Inference default params
constexpr int32_t defaultBatch{1};
constexpr int32_t batchNotProvided{0};
constexpr int32_t defaultStreams{1};
constexpr int32_t defaultIterations{10};
constexpr float defaultWarmUp{200.F};
constexpr float defaultDuration{3.F};
constexpr float defaultSleep{};
constexpr float defaultIdle{};
// Reporting default params
constexpr int32_t defaultAvgRuns{10};
constexpr float defaultPercentile{99};
enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
using Arguments = std::unordered_multimap<std::string, std::string>;
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
using ShapeRange =
std::array<std::vector<int32_t>,
nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
using LayerOutputTypes =
std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
struct Options {
virtual void parse(Arguments& arguments) = 0;
};
struct BaseModelOptions : public Options {
ModelFormat format{ModelFormat::kANY};
std::string model;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct UffInput : public Options {
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
bool NHWC{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ModelOptions : public Options {
BaseModelOptions baseModel;
std::string prototxt;
std::vector<std::string> outputs;
UffInput uffInputs;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct BuildOptions : public Options {
int32_t maxBatch{maxBatchNotProvided};
double workspace{-1.0};
double dlaSRAM{-1.0};
double dlaLocalDRAM{-1.0};
double dlaGlobalDRAM{-1.0};
int32_t minTiming{defaultMinTiming};
int32_t avgTiming{defaultAvgTiming};
bool tf32{true};
bool fp16{false};
bool int8{false};
bool directIO{false};
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
LayerPrecisions layerPrecisions;
LayerOutputTypes layerOutputTypes;
bool safe{false};
bool consistency{false};
bool restricted{false};
bool save{false};
bool load{false};
bool refittable{false};
SparsityFlag sparsity{SparsityFlag::kDISABLE};
nvinfer1::ProfilingVerbosity profilingVerbosity{
nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
std::string engine;
std::string calibration;
std::unordered_map<std::string, ShapeRange> shapes;
std::unordered_map<std::string, ShapeRange> shapesCalib;
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
nvinfer1::TacticSources enabledTactics{0};
nvinfer1::TacticSources disabledTactics{0};
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
std::string timingCacheFile{};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SystemOptions : public Options {
int32_t device{defaultDevice};
int32_t DLACore{-1};
bool fallback{false};
std::vector<std::string> plugins;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct InferenceOptions : public Options {
int32_t batch{batchNotProvided};
int32_t iterations{defaultIterations};
int32_t streams{defaultStreams};
float warmup{defaultWarmUp};
float duration{defaultDuration};
float sleep{defaultSleep};
float idle{defaultIdle};
bool overlap{true};
bool skipTransfers{false};
bool useManaged{false};
bool spin{false};
bool threads{false};
bool graph{false};
bool skip{false};
bool rerun{false};
bool timeDeserialize{false};
bool timeRefit{false};
std::unordered_map<std::string, std::string> inputs;
std::unordered_map<std::string, std::vector<int32_t>> shapes;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ReportingOptions : public Options {
bool verbose{false};
int32_t avgs{defaultAvgRuns};
float percentile{defaultPercentile};
bool refit{false};
bool output{false};
bool profile{false};
bool layerInfo{false};
std::string exportTimes;
std::string exportOutput;
std::string exportProfile;
std::string exportLayerInfo;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SafeBuilderOptions : public Options {
std::string serialized{};
std::string onnxModelFile{};
bool help{false};
bool verbose{false};
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
bool int8{false};
std::string calibFile{};
std::vector<std::string> plugins;
bool consistency{false};
bool standard{false};
void parse(Arguments& arguments) override;
static void printHelp(std::ostream& out);
};
struct AllOptions : public Options {
ModelOptions model;
BuildOptions build;
SystemOptions system;
InferenceOptions inference;
ReportingOptions reporting;
bool helps{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
bool parseHelp(Arguments& arguments);
void helpHelp(std::ostream& out);
// Functions to print options
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
std::ostream& operator<<(std::ostream& os, const UffInput& input);
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const nvinfer1::WeightsRole role) {
switch (role) {
case nvinfer1::WeightsRole::kKERNEL: {
os << "Kernel";
break;
}
case nvinfer1::WeightsRole::kBIAS: {
os << "Bias";
break;
}
case nvinfer1::WeightsRole::kSHIFT: {
os << "Shift";
break;
}
case nvinfer1::WeightsRole::kSCALE: {
os << "Scale";
break;
}
case nvinfer1::WeightsRole::kCONSTANT: {
os << "Constant";
break;
}
case nvinfer1::WeightsRole::kANY: {
os << "Any";
break;
}
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const std::vector<int32_t>& vec) {
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
os << (i ? "x" : "") << vec[i];
}
return os;
}
} // namespace sample
#endif // TRT_SAMPLES_OPTIONS_H

View File

@@ -1,480 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <exception>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <utility>
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
using namespace nvinfer1;
namespace sample {
namespace {
//!
//! \brief Find percentile in an ascending sequence of timings
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
//!
template <typename T>
float findPercentile(float percentile,
std::vector<InferenceTime> const& timings,
T const& toFloat) {
int32_t const all = static_cast<int32_t>(timings.size());
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
if (percentile < 0.0f || percentile > 100.0f) {
throw std::runtime_error("percentile is not in [0, 100]!");
}
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
}
//!
//! \brief Find median in a sorted sequence of timings
//!
template <typename T>
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
int32_t const m = timings.size() / 2;
if (timings.size() % 2) {
return toFloat(timings[m]);
}
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
}
//!
//! \brief Find coefficient of variance (which is std / mean) in a sorted
//! sequence of timings given the mean
//!
template <typename T>
float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
T const& toFloat, float mean) {
if (timings.empty()) {
return 0;
}
if (mean == 0.F) {
return std::numeric_limits<float>::infinity();
}
auto const metricAccumulator = [toFloat, mean](float acc,
InferenceTime const& a) {
float const diff = toFloat(a) - mean;
return acc + diff * diff;
};
float const variance =
std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
timings.size();
return std::sqrt(variance) / mean * 100.F;
}
inline InferenceTime traceToTiming(const InferenceTrace& a) {
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
(a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
(a.d2hEnd - a.h2dStart));
}
} // namespace
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float benchTimeMs, std::ostream& os) {
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
<< std::endl;
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
<< " s" << std::endl;
}
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os) {
int32_t count = 0;
InferenceTime sum;
os << std::endl;
os << "=== Trace details ===" << std::endl;
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
for (auto const& t : timings) {
sum += t;
if (++count == runsPerAvg) {
// clang-format off
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
<< " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
// clang-format on
count = 0;
sum.enq = 0;
sum.h2d = 0;
sum.compute = 0;
sum.d2h = 0;
sum.e2e = 0;
}
}
}
void printMetricExplanations(std::ostream& os) {
os << std::endl;
os << "=== Explanations of the performance metrics ===" << std::endl;
os << "Total Host Walltime: the host walltime from when the first query "
"(after warmups) is enqueued to when the "
"last query is completed."
<< std::endl;
os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
<< std::endl;
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
"the queries. If this is significantly "
"shorter than Total Host Walltime, the GPU may be under-utilized "
"because of host-side overheads or data "
"transfers."
<< std::endl;
os << "Throughput: the observed throughput computed by dividing the number "
"of queries by the Total Host Walltime. "
"If this is significantly lower than the reciprocal of GPU Compute "
"Time, the GPU may be under-utilized "
"because of host-side overheads or data transfers."
<< std::endl;
os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
"than GPU Compute Time, the GPU may be "
"under-utilized."
<< std::endl;
os << "H2D Latency: the latency for host-to-device data transfers for input "
"tensors of a single query."
<< std::endl;
os << "D2H Latency: the latency for device-to-host data transfers for output "
"tensors of a single query."
<< std::endl;
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
"Latency. This is the latency to infer a "
"single query."
<< std::endl;
os << "End-to-End Host Latency: the duration from when the H2D of a query is "
"called to when the D2H of the same "
"query is completed, which includes the latency to wait for the "
"completion of the previous query. This is "
"the latency of a query if multiple queries are enqueued consecutively."
<< std::endl;
}
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile) {
auto const metricComparator = [metricGetter](InferenceTime const& a,
InferenceTime const& b) {
return metricGetter(a) < metricGetter(b);
};
auto const metricAccumulator = [metricGetter](float acc,
InferenceTime const& a) {
return acc + metricGetter(a);
};
std::vector<InferenceTime> newTimings = timings;
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
PerformanceResult result;
result.min = metricGetter(newTimings.front());
result.max = metricGetter(newTimings.back());
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
metricAccumulator) /
newTimings.size();
result.median = findMedian(newTimings, metricGetter);
result.percentile = findPercentile(percentile, newTimings, metricGetter);
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
return result;
}
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
float percentile, int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
auto const latencyResult =
getPerformanceResult(timings, getLatency, percentile);
auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
auto const e2eLatencyResult =
getPerformanceResult(timings, getEndToEnd, percentile);
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
auto const enqueueResult =
getPerformanceResult(timings, getEnqueue, percentile);
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
auto const gpuComputeResult =
getPerformanceResult(timings, getCompute, percentile);
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
auto const toPerfString = [percentile](const PerformanceResult& r) {
std::stringstream s;
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
<< " ms, "
<< "median = " << r.median << " ms, percentile(" << percentile
<< "%) = " << r.percentile << " ms";
return s.str();
};
osInfo << std::endl;
osInfo << "=== Performance summary ===" << std::endl;
osInfo << "Throughput: " << throughput << " qps" << std::endl;
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
<< std::endl;
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
osInfo << "Total GPU Compute Time: "
<< gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
// Report warnings if the throughput is bound by other factors than GPU
// Compute Time.
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
if (enqueueResult.median >
kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
"Compute and the GPU may be under-utilized."
<< std::endl;
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs "
"where possible) may increase the "
"throughput."
<< std::endl;
}
if (h2dResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by host-to-device transfers for "
"the inputs rather than GPU Compute and "
"the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
if (d2hResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by device-to-host transfers for "
"the outputs rather than GPU Compute "
"and the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
// Report warnings if the GPU Compute Time is unstable.
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
osWarning
<< "* GPU compute time is unstable, with coefficient of variance = "
<< gpuComputeResult.coeffVar << "%." << std::endl;
osWarning << " If not already in use, locking GPU clock frequency or "
"adding --useSpinWait may improve the "
<< "stability." << std::endl;
}
// Explain what the metrics mean.
osInfo << "Explanations of the performance metrics are printed in the "
"verbose logs."
<< std::endl;
printMetricExplanations(osVerbose);
osInfo << std::endl;
}
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
const ReportingOptions& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
return a.computeStart >= warmupMs;
};
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
int32_t const warmups = noWarmup - trace.begin();
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
// when implicit batch used, batchSize = options.inference.batch, which is
// parsed through --batch
// when explicit batch used, batchSize = options.inference.batch = 0
// treat inference with explicit batch as a single query and report the
// throughput
batchSize = batchSize ? batchSize : 1;
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
warmupMs, benchTime, osInfo);
std::vector<InferenceTime> timings(trace.size() - warmups);
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
printTiming(timings, reporting.avgs, osInfo);
printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
osWarning, osVerbose);
if (!reporting.exportTimes.empty()) {
exportJSONTrace(trace, reporting.exportTimes);
}
}
//! Printed format:
//! [ value, ...]
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
//! h2d" : time, "start compute" : time,
//! "end compute" : time, "start d2h" : time, "end d2h" : time,
//! "h2d" : time, "compute" : time,
//! "d2h" : time, "latency" : time, "end to end" : time }
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName) {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl;
char const* sep = " ";
for (auto const& t : trace) {
InferenceTime const it(traceToTiming(t));
os << sep << "{ ";
sep = ", ";
// clang-format off
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep
<< "\"endToEndMs\" : " << it.e2e << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
if (mIterator == mLayers.end()) {
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
mUpdatesCount += mLayers.empty() || first;
if (first) {
mIterator = mLayers.begin();
} else {
mLayers.emplace_back();
mLayers.back().name = layerName;
mIterator = mLayers.end() - 1;
}
}
mIterator->timeMs += timeMs;
++mIterator;
}
void Profiler::print(std::ostream& os) const noexcept {
std::string const nameHdr("Layer");
std::string const timeHdr(" Time (ms)");
std::string const avgHdr(" Avg. Time (ms)");
std::string const percentageHdr(" Time %");
float const totalTimeMs = getTotalTime();
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
return a.name.size() < b.name.size();
};
auto const longestName =
std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
auto const nameLength =
std::max(longestName->name.size() + 1, nameHdr.size());
auto const timeLength = timeHdr.size();
auto const avgLength = avgHdr.size();
auto const percentageLength = percentageHdr.size();
os << std::endl
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
<< std::endl;
for (auto const& p : mLayers) {
// clang-format off
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
<< std::endl;
}
{
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
// clang-format on
}
os << std::endl;
}
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl
<< " { \"count\" : " << mUpdatesCount << " }" << std::endl;
auto const totalTimeMs = getTotalTime();
for (auto const& l : mLayers) {
// clang-format off
os << ", {" << " \"name\" : \"" << l.name << "\""
", \"timeMs\" : " << l.timeMs
<< ", \"averageMs\" : " << l.timeMs / mUpdatesCount
<< ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
<< " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Input Tensors:" << std::endl;
bindings.dumpInputs(context, os);
}
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Output Tensors:" << std::endl;
bindings.dumpOutputs(context, os);
}
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch) {
std::ofstream os(fileName, std::ofstream::trunc);
std::string sep = " ";
auto const output = bindings.getOutputBindings();
os << "[" << std::endl;
for (auto const& binding : output) {
// clang-format off
os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
sep = ", ";
os << " " << sep << "\"dimensions\" : \"";
bindings.dumpBindingDimensions(binding.second, context, os);
os << "\"" << std::endl;
os << " " << sep << "\"values\" : [ ";
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
os << " ]" << std::endl << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
} // namespace sample

View File

@@ -1,211 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H
#include <functional>
#include <iostream>
#include "NvInfer.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime {
InferenceTime(float q, float i, float c, float o, float e)
: enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
InferenceTime() = default;
InferenceTime(InferenceTime const&) = default;
InferenceTime(InferenceTime&&) = default;
InferenceTime& operator=(InferenceTime const&) = default;
InferenceTime& operator=(InferenceTime&&) = default;
~InferenceTime() = default;
float enq{0}; // Enqueue
float h2d{0}; // Host to Device
float compute{0}; // Compute
float d2h{0}; // Device to Host
float e2e{0}; // end to end
// ideal latency
float latency() const { return h2d + compute + d2h; }
};
//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace {
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
float ce, float os, float oe)
: stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
InferenceTrace() = default;
InferenceTrace(InferenceTrace const&) = default;
InferenceTrace(InferenceTrace&&) = default;
InferenceTrace& operator=(InferenceTrace const&) = default;
InferenceTrace& operator=(InferenceTrace&&) = default;
~InferenceTrace() = default;
int32_t stream{0};
float enqStart{0};
float enqEnd{0};
float h2dStart{0};
float h2dEnd{0};
float computeStart{0};
float computeEnd{0};
float d2hStart{0};
float d2hEnd{0};
};
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
a.d2h + b.d2h, a.e2e + b.e2e);
}
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
return a = a + b;
}
//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult {
float min{0};
float max{0};
float mean{0};
float median{0};
float percentile{0};
float coeffVar{0}; // coefficient of variation
};
//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float walltime, std::ostream& os);
//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os);
//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile);
//!
//! \brief Print the explanations of the performance metrics printed in
//! printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);
//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
ReportingOptions const& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName);
//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Print output tensors to stream
//!
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Export output tensors to JSON file
//!
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch);
//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile {
std::string name;
float timeMs{0};
};
//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in
//! the same order
//!
class Profiler : public nvinfer1::IProfiler {
public:
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
void print(std::ostream& os) const noexcept;
//!
//! \brief Export a profile to JSON file
//!
void exportJSONProfile(std::string const& fileName) const noexcept;
private:
float getTotalTime() const noexcept {
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
return accumulator + lp.timeMs;
};
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
}
std::vector<LayerProfile> mLayers;
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
int32_t mUpdatesCount{0};
};
} // namespace sample
#endif // TRT_SAMPLE_REPORTING_H

View File

@@ -1,494 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <unordered_map>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include "NvInfer.h"
#include "common.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleOptions.h"
namespace sample {
inline int dataTypeSize(nvinfer1::DataType dataType) {
switch (dataType) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
template <typename T> inline T roundUp(T m, T n) {
return ((m + n - 1) / n) * n;
}
inline int volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
int vecDim, int comps, int batch) {
int maxNbElems = 1;
for (int i = 0; i < dims.nbDims; ++i) {
// Get effective length of axis.
int d = dims.d[i];
// Any dimension is 0, it is an empty tensor.
if (d == 0) {
return 0;
}
if (i == vecDim) {
d = samplesCommon::divUp(d, comps);
}
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
}
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}
inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
if (vecDim != -1) {
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
}
return volume(dims) * std::max(batch, 1);
}
inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
sample::gLogWarning
<< "Vector too long, only first 8 elements are used in dimension."
<< std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename T>
inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
if (std::is_integral<T>::value) {
std::uniform_int_distribution<int> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
} else {
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
}
// Specialization needed for custom type __half
template <typename H>
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
H* typedBuffer = static_cast<H*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<H>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <>
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
__half max) {
fillBufferHalf(buffer, volume, min, max);
}
template <typename T>
inline void dumpBuffer(const void* buffer, const std::string& separator,
std::ostream& os, const Dims& dims, const Dims& strides,
int32_t vectorDim, int32_t spv) {
const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
std::multiplies<int64_t>());
const T* typedBuffer = static_cast<const T*>(buffer);
std::string sep;
for (int64_t v = 0; v < volume; ++v) {
int64_t curV = v;
int32_t dataOffset = 0;
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
int32_t dimVal = curV % dims.d[dimIndex];
if (dimIndex == vectorDim) {
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
} else {
dataOffset +=
dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
}
curV /= dims.d[dimIndex];
ASSERT(curV >= 0);
}
os << sep << typedBuffer[dataOffset];
sep = separator;
}
}
inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
ASSERT(dst);
std::ifstream file(fileName, std::ios::in | std::ios::binary);
if (file.is_open()) {
file.read(dst, size);
file.close();
} else {
std::stringstream msg;
msg << "Cannot open file " << fileName << "!";
throw std::invalid_argument(msg.str());
}
}
struct Binding {
bool isInput{false};
std::unique_ptr<IMirroredBuffer> buffer;
int64_t volume{0};
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
void fill(const std::string& fileName) {
loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
buffer->getSize());
}
void fill() {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
break;
}
case nvinfer1::DataType::kINT32: {
fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kINT8: {
fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kFLOAT: {
fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
case nvinfer1::DataType::kHALF: {
fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
}
}
void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
int32_t spv, const std::string separator = " ") const {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT32: {
dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT8: {
dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kFLOAT: {
dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kHALF: {
dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
}
}
};
class Bindings {
public:
Bindings() = delete;
explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
nvinfer1::DataType dataType,
const std::string& fileName = "") {
while (mBindings.size() <= static_cast<size_t>(b)) {
mBindings.emplace_back();
mDevicePointers.emplace_back();
}
mNames[name] = b;
if (mBindings[b].buffer == nullptr) {
if (mUseManaged) {
mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
} else {
mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
}
}
mBindings[b].isInput = isInput;
// Some memory allocators return nullptr when allocating zero bytes, but
// TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
if (volume == 0) {
mBindings[b].buffer->allocate(1);
} else {
mBindings[b].buffer->allocate(
static_cast<size_t>(volume) *
static_cast<size_t>(dataTypeSize(dataType)));
}
mBindings[b].volume = volume;
mBindings[b].dataType = dataType;
mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
if (isInput) {
if (fileName.empty()) {
fill(b);
} else {
fill(b, fileName);
}
}
}
void** getDeviceBuffers() { return mDevicePointers.data(); }
void transferInputToDevice(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (mBindings[b.second].isInput) {
mBindings[b.second].buffer->hostToDevice(stream);
}
}
}
void transferOutputToHost(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (!mBindings[b.second].isInput) {
mBindings[b.second].buffer->deviceToHost(stream);
}
}
}
void fill(int binding, const std::string& fileName) {
mBindings[binding].fill(fileName);
}
void fill(int binding) { mBindings[binding].fill(); }
void dumpBindingDimensions(int binding,
const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
const auto dims = context.getBindingDimensions(binding);
// Do not add a newline terminator, because the caller may be outputting a
// JSON string.
os << dims;
}
void dumpBindingValues(const nvinfer1::IExecutionContext& context,
int binding, std::ostream& os,
const std::string& separator = " ",
int32_t batch = 1) const {
Dims dims = context.getBindingDimensions(binding);
Dims strides = context.getStrides(binding);
int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
const int32_t spv =
context.getEngine().getBindingComponentsPerElement(binding);
if (context.getEngine().hasImplicitBatchDimension()) {
auto insertN = [](Dims& d, int32_t bs) {
const int32_t nbDims = d.nbDims;
ASSERT(nbDims < Dims::MAX_DIMS);
std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
d.d[0] = bs;
d.nbDims = nbDims + 1;
};
int32_t batchStride = 0;
for (int32_t i = 0; i < strides.nbDims; ++i) {
if (strides.d[i] * dims.d[i] > batchStride) {
batchStride = strides.d[i] * dims.d[i];
}
}
insertN(dims, batch);
insertN(strides, batchStride);
vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
}
mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
}
void dumpInputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isInput = [](const Binding& b) { return b.isInput; };
dumpBindings(context, isInput, os);
}
void dumpOutputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
dumpBindings(context, isOutput, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto all = [](const Binding& b) { return true; };
dumpBindings(context, all, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
bool (*predicate)(const Binding& b),
std::ostream& os) const {
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
os << n.first << ": (";
dumpBindingDimensions(binding, context, os);
os << ")" << std::endl;
dumpBindingValues(context, binding, os);
os << std::endl;
}
}
}
std::unordered_map<std::string, int> getInputBindings() const {
auto isInput = [](const Binding& b) { return b.isInput; };
return getBindings(isInput);
}
std::unordered_map<std::string, int> getOutputBindings() const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
return getBindings(isOutput);
}
std::unordered_map<std::string, int> getBindings() const {
auto all = [](const Binding& b) { return true; };
return getBindings(all);
}
std::unordered_map<std::string, int>
getBindings(bool (*predicate)(const Binding& b)) const {
std::unordered_map<std::string, int> bindings;
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
bindings.insert(n);
}
}
return bindings;
}
private:
std::unordered_map<std::string, int32_t> mNames;
std::vector<Binding> mBindings;
std::vector<void*> mDevicePointers;
bool mUseManaged{false};
};
template <typename T> struct TrtDestroyer {
void operator()(T* t) { t->destroy(); }
};
template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
size_t nbBindings, bool isInput = true) {
bool broadcast = formats.size() == 1;
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
if (!formats.empty() && !validFormatsCount) {
if (isInput) {
throw std::invalid_argument(
"The number of inputIOFormats must match network's inputs or be one "
"for broadcasting.");
} else {
throw std::invalid_argument(
"The number of outputIOFormats must match network's outputs or be "
"one for broadcasting.");
}
}
return broadcast;
}
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
if (!iFile) {
sample::gLogWarning << "Could not read timing cache from: " << inFileName
<< ". A new timing cache will be generated and written."
<< std::endl;
return std::vector<char>();
}
iFile.seekg(0, std::ifstream::end);
size_t fsize = iFile.tellg();
iFile.seekg(0, std::ifstream::beg);
std::vector<char> content(fsize);
iFile.read(content.data(), fsize);
iFile.close();
sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
<< inFileName << std::endl;
return content;
}
inline void saveTimingCacheFile(const std::string outFileName,
const IHostMemory* blob) {
std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
if (!oFile) {
sample::gLogWarning << "Could not write timing cache to: " << outFileName
<< std::endl;
return;
}
oFile.write((char*)blob->data(), blob->size());
oFile.close();
sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
<< outFileName << std::endl;
}
inline int32_t getCudaDriverVersion() {
int32_t version{-1};
cudaCheck(cudaDriverGetVersion(&version));
return version;
}
inline int32_t getCudaRuntimeVersion() {
int32_t version{-1};
cudaCheck(cudaRuntimeGetVersion(&version));
return version;
}
} // namespace sample
#endif // TRT_SAMPLE_UTILS_H

View File

@@ -1,568 +0,0 @@
/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
#ifdef REPLACE_GETOPT
int opterr = 1; /* if error message should be printed */
int optind = 1; /* index into parent argv vector */
int optopt = '?'; /* character checked for validity */
#undef optreset /* see getopt.h */
#define optreset __mingw_optreset
int optreset; /* reset getopt */
char* optarg; /* argument associated with option */
#endif
#define PRINT_ERROR ((opterr) && (*options != ':'))
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
/* return values */
#define BADCH (int) '?'
#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
#define INORDER (int) 1
#ifndef __CYGWIN__
#define __progname __argv[0]
#else
extern char __declspec(dllimport) * __progname;
#endif
#ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int);
static int parse_long_options(char* const*, const char*, const struct option*, int*, int);
static int gcd(int, int);
static void permute_args(int, int, int, char* const*);
static char* place = EMSG; /* option letter processing */
/* XXX: set optreset to 1 rather than these two */
static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
static const char recargchar[] = "option requires an argument -- %c";
static const char recargstring[] = "option requires an argument -- %s";
static const char ambig[] = "ambiguous option -- %.*s";
static const char noarg[] = "option doesn't take an argument -- %.*s";
static const char illoptchar[] = "unknown option -- %c";
static const char illoptstring[] = "unknown option -- %s";
static void _vwarnx(const char* fmt, va_list ap)
{
(void) fprintf(stderr, "%s: ", __progname);
if (fmt != NULL)
(void) vfprintf(stderr, fmt, ap);
(void) fprintf(stderr, "\n");
}
static void warnx(const char* fmt, ...)
{
va_list ap;
va_start(ap, fmt);
_vwarnx(fmt, ap);
va_end(ap);
}
/*
* Compute the greatest common divisor of a and b.
*/
static int gcd(int a, int b)
{
int c;
c = a % b;
while (c != 0)
{
a = b;
b = c;
c = a % b;
}
return (b);
}
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
{
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
char* swap;
/*
* compute lengths of blocks and number and size of cycles
*/
nnonopts = panonopt_end - panonopt_start;
nopts = opt_end - panonopt_end;
ncycle = gcd(nnonopts, nopts);
cyclelen = (opt_end - panonopt_start) / ncycle;
for (i = 0; i < ncycle; i++)
{
cstart = panonopt_end + i;
pos = cstart;
for (j = 0; j < cyclelen; j++)
{
if (pos >= panonopt_end)
pos -= nnonopts;
else
pos += nopts;
swap = nargv[pos];
/* LINTED const cast */
((char**) nargv)[pos] = nargv[cstart];
/* LINTED const cast */
((char**) nargv)[cstart] = swap;
}
}
}
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int parse_long_options(
char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too)
{
char *current_argv, *has_equal;
size_t current_argv_len;
int i, ambiguous, match;
#define IDENTICAL_INTERPRETATION(_x, _y) \
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \
&& long_options[(_x)].val == long_options[(_y)].val)
current_argv = place;
match = -1;
ambiguous = 0;
optind++;
if ((has_equal = strchr(current_argv, '=')) != NULL)
{
/* argument found (--option=arg) */
current_argv_len = has_equal - current_argv;
has_equal++;
}
else
current_argv_len = strlen(current_argv);
for (i = 0; long_options[i].name; i++)
{
/* find matching long option */
if (strncmp(current_argv, long_options[i].name, current_argv_len))
continue;
if (strlen(long_options[i].name) == current_argv_len)
{
/* exact match */
match = i;
ambiguous = 0;
break;
}
/*
* If this is a known short option, don't allow
* a partial match of a single character.
*/
if (short_too && current_argv_len == 1)
continue;
if (match == -1) /* partial match */
match = i;
else if (!IDENTICAL_INTERPRETATION(i, match))
ambiguous = 1;
}
if (ambiguous)
{
/* ambiguous abbreviation */
if (PRINT_ERROR)
warnx(ambig, (int) current_argv_len, current_argv);
optopt = 0;
return (BADCH);
}
if (match != -1)
{ /* option found */
if (long_options[match].has_arg == no_argument && has_equal)
{
if (PRINT_ERROR)
warnx(noarg, (int) current_argv_len, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
return (BADARG);
}
if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
{
if (has_equal)
optarg = has_equal;
else if (long_options[match].has_arg == required_argument)
{
/*
* optional argument doesn't use next nargv
*/
optarg = nargv[optind++];
}
}
if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
{
/*
* Missing argument; leading ':' indicates no error
* should be generated.
*/
if (PRINT_ERROR)
warnx(recargstring, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
--optind;
return (BADARG);
}
}
else
{ /* unknown option */
if (short_too)
{
--optind;
return (-1);
}
if (PRINT_ERROR)
warnx(illoptstring, current_argv);
optopt = 0;
return (BADCH);
}
if (idx)
*idx = match;
if (long_options[match].flag)
{
*long_options[match].flag = long_options[match].val;
return (0);
}
else
return (long_options[match].val);
#undef IDENTICAL_INTERPRETATION
}
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int getopt_internal(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags)
{
const char* oli; /* option letter list index */
int optchar, short_too;
static int posixly_correct = -1;
if (options == NULL)
return (-1);
/*
* XXX Some GNU programs (like cvs) set optind to 0 instead of
* XXX using optreset. Work around this braindamage.
*/
if (optind == 0)
optind = optreset = 1;
/*
* Disable GNU extensions if POSIXLY_CORRECT is set or options
* string begins with a '+'.
*
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
* optreset != 0 for GNU compatibility.
*/
if (posixly_correct == -1 || optreset != 0)
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
if (*options == '-')
flags |= FLAG_ALLARGS;
else if (posixly_correct || *options == '+')
flags &= ~FLAG_PERMUTE;
if (*options == '+' || *options == '-')
options++;
optarg = NULL;
if (optreset)
nonopt_start = nonopt_end = -1;
start:
if (optreset || !*place)
{ /* update scanning pointer */
optreset = 0;
if (optind >= nargc)
{ /* end of argument vector */
place = EMSG;
if (nonopt_end != -1)
{
/* do permutation, if we have to */
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
else if (nonopt_start != -1)
{
/*
* If we skipped non-options, set optind
* to the first of them.
*/
optind = nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
{
place = EMSG; /* found non-option */
if (flags & FLAG_ALLARGS)
{
/*
* GNU extension:
* return non-option as argument to option 1
*/
optarg = nargv[optind++];
return (INORDER);
}
if (!(flags & FLAG_PERMUTE))
{
/*
* If no permutation wanted, stop parsing
* at first non-option.
*/
return (-1);
}
/* do permutation */
if (nonopt_start == -1)
nonopt_start = optind;
else if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
nonopt_start = optind - (nonopt_end - nonopt_start);
nonopt_end = -1;
}
optind++;
/* process next argument */
goto start;
}
if (nonopt_start != -1 && nonopt_end == -1)
nonopt_end = optind;
/*
* If we have "-" do nothing, if "--" we are done.
*/
if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
{
optind++;
place = EMSG;
/*
* We found an option (--), so if we skipped
* non-options, we have to permute.
*/
if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
}
/*
* Check long options if:
* 1) we were passed some
* 2) the arg is not just "-"
* 3) either the arg starts with -- we are getopt_long_only()
*/
if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
{
short_too = 0;
if (*place == '-')
place++; /* --foo long option */
else if (*place != ':' && strchr(options, *place) != NULL)
short_too = 1; /* could be short option too */
optchar = parse_long_options(nargv, options, long_options, idx, short_too);
if (optchar != -1)
{
place = EMSG;
return (optchar);
}
}
if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
|| (oli = strchr(options, optchar)) == NULL)
{
/*
* If the user specified "-" and '-' isn't listed in
* options, return -1 (non-option) as per POSIX.
* Otherwise, it is an unknown option character (or ':').
*/
if (optchar == (int) '-' && *place == '\0')
return (-1);
if (!*place)
++optind;
if (PRINT_ERROR)
warnx(illoptchar, optchar);
optopt = optchar;
return (BADCH);
}
if (long_options != NULL && optchar == 'W' && oli[1] == ';')
{
/* -W long-option */
if (*place) /* no space */
/* NOTHING */;
else if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else /* white space */
place = nargv[optind];
optchar = parse_long_options(nargv, options, long_options, idx, 0);
place = EMSG;
return (optchar);
}
if (*++oli != ':')
{ /* doesn't take argument */
if (!*place)
++optind;
}
else
{ /* takes (optional) argument */
optarg = NULL;
if (*place) /* no white space */
optarg = place;
else if (oli[1] != ':')
{ /* arg not optional */
if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else
optarg = nargv[optind];
}
place = EMSG;
++optind;
}
/* dump back option letter */
return (optchar);
}
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int getopt(int nargc, char* const* nargv, const char* options)
{
/*
* We don't pass FLAG_PERMUTE to getopt_internal() since
* the BSD getopt(3) (unlike GNU) has never done this.
*
* Furthermore, since many privileged programs call getopt()
* before dropping privileges it makes sense to keep things
* as simple (and bug-free) as possible.
*/
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
}
#endif /* REPLACE_GETOPT */
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
}
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
}

View File

@@ -1,124 +0,0 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file has no copyright assigned and is placed in the Public Domain.
* This file is a part of the w64 mingw-runtime package.
*
* The w64 mingw-runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#define __GETOPT_H__
/* All the headers include this file. */
#include <crtdefs.h>
#if defined(WINGETOPT_SHARED_LIB)
#if defined(BUILDING_WINGETOPT_DLL)
#define WINGETOPT_API __declspec(dllexport)
#else
#define WINGETOPT_API __declspec(dllimport)
#endif
#else
#define WINGETOPT_API
#endif
#ifdef __cplusplus
extern "C"
{
#endif
WINGETOPT_API extern int optind; /* index of first non-option in argv */
WINGETOPT_API extern int optopt; /* single option character, as parsed */
WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
/* (user may set to zero, to suppress) */
WINGETOPT_API extern char* optarg; /* pointer to argument of current option */
extern int getopt(int nargc, char* const* nargv, const char* options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
#define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C"
{
#endif
struct option /* specification for a long form option... */
{
const char* name; /* option name, without leading hyphens */
int has_arg; /* does it take an argument? */
int* flag; /* where to save its status, or NULL */
int val; /* its associated status value */
};
enum /* permitted values for its `has_arg' field... */
{
no_argument = 0, /* option never takes an argument */
required_argument, /* option always requires an argument */
optional_argument /* option may take an argument */
};
extern int getopt_long(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
extern int getopt_long_only(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
#define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
}
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */

View File

@@ -13,12 +13,17 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/tensorrt/trt_backend.h" #include "fastdeploy/backends/tensorrt/trt_backend.h"
#include <cstring>
#include "NvInferSafeRuntime.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"
#endif #endif
namespace fastdeploy { namespace fastdeploy {
FDTrtLogger* FDTrtLogger::logger = nullptr;
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) { size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
if (dtype == nvinfer1::DataType::kFLOAT) { if (dtype == nvinfer1::DataType::kFLOAT) {
return sizeof(float); return sizeof(float);
@@ -130,8 +135,8 @@ bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
fin.seekg(0, std::ios::beg); fin.seekg(0, std::ios::beg);
fin.read(&(engine_buffer.at(0)), engine_buffer.size()); fin.read(&(engine_buffer.at(0)), engine_buffer.size());
fin.close(); fin.close();
SampleUniquePtr<IRuntime> runtime{ FDUniquePtr<nvinfer1::IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())}; nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
if (!runtime) { if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl; FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false; return false;
@@ -139,7 +144,7 @@ bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>( engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(engine_buffer.data(), runtime->deserializeCudaEngine(engine_buffer.data(),
engine_buffer.size()), engine_buffer.size()),
samplesCommon::InferDeleter()); FDInferDeleter());
if (!engine_) { if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false; return false;
@@ -320,10 +325,10 @@ void TrtBackend::GetInputOutputInfo() {
auto dtype = engine_->getBindingDataType(i); auto dtype = engine_->getBindingDataType(i);
if (engine_->bindingIsInput(i)) { if (engine_->bindingIsInput(i)) {
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
inputs_buffer_[name] = DeviceBuffer(dtype); inputs_buffer_[name] = FDDeviceBuffer(dtype);
} else { } else {
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
outputs_buffer_[name] = DeviceBuffer(dtype); outputs_buffer_[name] = FDDeviceBuffer(dtype);
} }
} }
bindings_.resize(num_binds); bindings_.resize(num_binds);
@@ -334,7 +339,7 @@ void TrtBackend::AllocateBufferInDynamicShape(
for (const auto& item : inputs) { for (const auto& item : inputs) {
auto idx = engine_->getBindingIndex(item.name.c_str()); auto idx = engine_->getBindingIndex(item.name.c_str());
std::vector<int> shape(item.shape.begin(), item.shape.end()); std::vector<int> shape(item.shape.begin(), item.shape.end());
auto dims = sample::toDims(shape); auto dims = ToDims(shape);
context_->setBindingDimensions(idx, dims); context_->setBindingDimensions(idx, dims);
if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) { if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
inputs_buffer_[item.name].resize(dims); inputs_buffer_[item.name].resize(dims);
@@ -357,7 +362,7 @@ void TrtBackend::AllocateBufferInDynamicShape(
(*outputs)[ori_idx].shape.assign(output_dims.d, (*outputs)[ori_idx].shape.assign(output_dims.d,
output_dims.d + output_dims.nbDims); output_dims.d + output_dims.nbDims);
(*outputs)[ori_idx].name = outputs_desc_[i].name; (*outputs)[ori_idx].name = outputs_desc_[i].name;
(*outputs)[ori_idx].data.resize(volume(output_dims) * (*outputs)[ori_idx].data.resize(Volume(output_dims) *
TrtDataTypeSize(outputs_desc_[i].dtype)); TrtDataTypeSize(outputs_desc_[i].dtype));
if ((*outputs)[ori_idx].Nbytes() > if ((*outputs)[ori_idx].Nbytes() >
outputs_buffer_[outputs_desc_[i].name].nbBytes()) { outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
@@ -373,19 +378,19 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
1U << static_cast<uint32_t>( 1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
builder_ = SampleUniquePtr<nvinfer1::IBuilder>( builder_ = FDUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); nvinfer1::createInferBuilder(*FDTrtLogger::Get()));
if (!builder_) { if (!builder_) {
FDERROR << "Failed to call createInferBuilder()." << std::endl; FDERROR << "Failed to call createInferBuilder()." << std::endl;
return false; return false;
} }
network_ = SampleUniquePtr<nvinfer1::INetworkDefinition>( network_ = FDUniquePtr<nvinfer1::INetworkDefinition>(
builder_->createNetworkV2(explicitBatch)); builder_->createNetworkV2(explicitBatch));
if (!network_) { if (!network_) {
FDERROR << "Failed to call createNetworkV2()." << std::endl; FDERROR << "Failed to call createNetworkV2()." << std::endl;
return false; return false;
} }
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>( auto config = FDUniquePtr<nvinfer1::IBuilderConfig>(
builder_->createBuilderConfig()); builder_->createBuilderConfig());
if (!config) { if (!config) {
FDERROR << "Failed to call createBuilderConfig()." << std::endl; FDERROR << "Failed to call createBuilderConfig()." << std::endl;
@@ -402,8 +407,8 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
} }
} }
parser_ = SampleUniquePtr<nvonnxparser::IParser>( parser_ = FDUniquePtr<nvonnxparser::IParser>(
nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger())); nvonnxparser::createParser(*network_, *FDTrtLogger::Get()));
if (!parser_) { if (!parser_) {
FDERROR << "Failed to call createParser()." << std::endl; FDERROR << "Failed to call createParser()." << std::endl;
return false; return false;
@@ -429,7 +434,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
// set min shape // set min shape
FDASSERT(profile->setDimensions(item.first.c_str(), FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMIN, nvinfer1::OptProfileSelector::kMIN,
sample::toDims(item.second)), ToDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: %s in TrtBackend.", item.first.c_str()); "[TrtBackend] Failed to set min_shape for input: %s in TrtBackend.", item.first.c_str());
// set optimization shape // set optimization shape
@@ -438,7 +443,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
"[TrtBackend] Cannot find input name: %s in TrtBackendOption::opt_shape.", item.first.c_str()); "[TrtBackend] Cannot find input name: %s in TrtBackendOption::opt_shape.", item.first.c_str());
FDASSERT(profile->setDimensions(item.first.c_str(), FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kOPT, nvinfer1::OptProfileSelector::kOPT,
sample::toDims(iter->second)), ToDims(iter->second)),
"[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.", item.first.c_str()); "[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.", item.first.c_str());
// set max shape // set max shape
iter = option.max_shape.find(item.first); iter = option.max_shape.find(item.first);
@@ -446,21 +451,21 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
"[TrtBackend] Cannot find input name: %s in TrtBackendOption::max_shape.", item.first); "[TrtBackend] Cannot find input name: %s in TrtBackendOption::max_shape.", item.first);
FDASSERT(profile->setDimensions(item.first.c_str(), FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMAX, nvinfer1::OptProfileSelector::kMAX,
sample::toDims(iter->second)), ToDims(iter->second)),
"[TrtBackend] Failed to set max_shape for input: %s in TrtBackend.", item.first); "[TrtBackend] Failed to set max_shape for input: %s in TrtBackend.", item.first);
} }
config->addOptimizationProfile(profile); config->addOptimizationProfile(profile);
} }
SampleUniquePtr<IHostMemory> plan{ FDUniquePtr<nvinfer1::IHostMemory> plan{
builder_->buildSerializedNetwork(*network_, *config)}; builder_->buildSerializedNetwork(*network_, *config)};
if (!plan) { if (!plan) {
FDERROR << "Failed to call buildSerializedNetwork()." << std::endl; FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
return false; return false;
} }
SampleUniquePtr<IRuntime> runtime{ FDUniquePtr<nvinfer1::IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())}; nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
if (!runtime) { if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl; FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false; return false;
@@ -468,7 +473,7 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>( engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(plan->data(), plan->size()), runtime->deserializeCudaEngine(plan->data(), plan->size()),
samplesCommon::InferDeleter()); FDInferDeleter());
if (!engine_) { if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false; return false;

View File

@@ -20,19 +20,12 @@
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/utils.h"
#include "fastdeploy/backends/tensorrt/common/argsParser.h"
#include "fastdeploy/backends/tensorrt/common/buffers.h"
#include "fastdeploy/backends/tensorrt/common/common.h"
#include "fastdeploy/backends/tensorrt/common/logger.h"
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#include "NvOnnxParser.h"
#include "NvInfer.h" #include "NvInfer.h"
namespace fastdeploy { namespace fastdeploy {
using namespace samplesCommon;
struct TrtValueInfo { struct TrtValueInfo {
std::string name; std::string name;
@@ -86,15 +79,15 @@ class TrtBackend : public BaseBackend {
private: private:
std::shared_ptr<nvinfer1::ICudaEngine> engine_; std::shared_ptr<nvinfer1::ICudaEngine> engine_;
std::shared_ptr<nvinfer1::IExecutionContext> context_; std::shared_ptr<nvinfer1::IExecutionContext> context_;
SampleUniquePtr<nvonnxparser::IParser> parser_; FDUniquePtr<nvonnxparser::IParser> parser_;
SampleUniquePtr<nvinfer1::IBuilder> builder_; FDUniquePtr<nvinfer1::IBuilder> builder_;
SampleUniquePtr<nvinfer1::INetworkDefinition> network_; FDUniquePtr<nvinfer1::INetworkDefinition> network_;
cudaStream_t stream_{}; cudaStream_t stream_{};
std::vector<void*> bindings_; std::vector<void*> bindings_;
std::vector<TrtValueInfo> inputs_desc_; std::vector<TrtValueInfo> inputs_desc_;
std::vector<TrtValueInfo> outputs_desc_; std::vector<TrtValueInfo> outputs_desc_;
std::map<std::string, DeviceBuffer> inputs_buffer_; std::map<std::string, FDDeviceBuffer> inputs_buffer_;
std::map<std::string, DeviceBuffer> outputs_buffer_; std::map<std::string, FDDeviceBuffer> outputs_buffer_;
// Sometimes while the number of outputs > 1 // Sometimes while the number of outputs > 1
// the output order of tensorrt may not be same // the output order of tensorrt may not be same

View File

@@ -0,0 +1,199 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <algorithm>
#include <cuda_runtime_api.h>
#include "NvInfer.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
struct FDInferDeleter {
template<typename T> void operator()(T* obj) const {
delete obj;
}
};
template<typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
inline uint32_t GetElementSize(nvinfer1::DataType t) noexcept {
switch (t) {
case nvinfer1::DataType::kINT32:
return 4;
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
inline int64_t Volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
inline nvinfer1::Dims ToDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
FDWARNING << "Vector too long, only first 8 elements are used in dimension." << std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
public:
//!
//! \brief Construct an empty buffer.
//!
explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
//!
//! \brief Construct a buffer with the specified allocation size in bytes.
//!
FDGenericBuffer(size_t size, nvinfer1::DataType type)
: mSize(size), mCapacity(size), mType(type) {
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc();
}
}
FDGenericBuffer(FDGenericBuffer&& buf)
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
mBuffer(buf.mBuffer) {
buf.mSize = 0;
buf.mCapacity = 0;
buf.mType = nvinfer1::DataType::kFLOAT;
buf.mBuffer = nullptr;
}
FDGenericBuffer& operator=(FDGenericBuffer&& buf) {
if (this != &buf) {
freeFn(mBuffer);
mSize = buf.mSize;
mCapacity = buf.mCapacity;
mType = buf.mType;
mBuffer = buf.mBuffer;
// Reset buf.
buf.mSize = 0;
buf.mCapacity = 0;
buf.mBuffer = nullptr;
}
return *this;
}
//!
//! \brief Returns pointer to underlying array.
//!
void* data() { return mBuffer; }
//!
//! \brief Returns pointer to underlying array.
//!
const void* data() const { return mBuffer; }
//!
//! \brief Returns the size (in number of elements) of the buffer.
//!
size_t size() const { return mSize; }
//!
//! \brief Returns the size (in bytes) of the buffer.
//!
size_t nbBytes() const {
return this->size() * GetElementSize(mType);
}
//!
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
//! or equal to the current capacity.
//!
void resize(size_t newSize) {
mSize = newSize;
if (mCapacity < newSize) {
freeFn(mBuffer);
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc{};
}
mCapacity = newSize;
}
}
//!
//! \brief Overload of resize that accepts Dims
//!
void resize(const nvinfer1::Dims& dims) {
return this->resize(Volume(dims));
}
~FDGenericBuffer() { freeFn(mBuffer); }
private:
size_t mSize{0}, mCapacity{0};
nvinfer1::DataType mType;
void* mBuffer;
AllocFunc allocFn;
FreeFunc freeFn;
};
class FDDeviceAllocator {
public:
bool operator()(void** ptr, size_t size) const {
return cudaMalloc(ptr, size) == cudaSuccess;
}
};
class FDDeviceFree {
public:
void operator()(void* ptr) const { cudaFree(ptr); }
};
using FDDeviceBuffer = FDGenericBuffer<FDDeviceAllocator, FDDeviceFree>;
class FDTrtLogger : public nvinfer1::ILogger {
public:
static FDTrtLogger* logger;
static FDTrtLogger* Get() {
if (logger != nullptr) {
return logger;
}
logger = new FDTrtLogger();
return logger;
}
void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override {
if (severity == nvinfer1::ILogger::Severity::kINFO) {
FDINFO << msg << std::endl;
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
FDWARNING << msg << std::endl;
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
FDERROR << msg << std::endl;
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
FDASSERT(false, "%s", msg);
}
}
};
} // namespace fastdeploy

View File

@@ -122,7 +122,7 @@ void FDTensor::PrintInfo(const std::string& prefix) {
"PrintInfo function doesn't support current situation, maybe you " "PrintInfo function doesn't support current situation, maybe you "
"need enhance this function now."); "need enhance this function now.");
} }
std::cout << prefix << ": shape="; std::cout << prefix << ": name=" << name << ", shape=";
for (int i = 0; i < shape.size(); ++i) { for (int i = 0; i < shape.size(); ++i) {
std::cout << shape[i] << " "; std::cout << shape[i] << " ";
} }