Files
FastDeploy/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
Jason ffbc5cc42d Move cpp code to directory csrcs (#42)
* move cpp code to csrcs

* move cpp code to csrcs
2022-07-26 17:59:02 +08:00

495 lines
15 KiB
C++

/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <unordered_map>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include "NvInfer.h"
#include "common.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleOptions.h"
namespace sample {
inline int dataTypeSize(nvinfer1::DataType dataType) {
switch (dataType) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
template <typename T> inline T roundUp(T m, T n) {
return ((m + n - 1) / n) * n;
}
inline int volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
int vecDim, int comps, int batch) {
int maxNbElems = 1;
for (int i = 0; i < dims.nbDims; ++i) {
// Get effective length of axis.
int d = dims.d[i];
// Any dimension is 0, it is an empty tensor.
if (d == 0) {
return 0;
}
if (i == vecDim) {
d = samplesCommon::divUp(d, comps);
}
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
}
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}
inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
if (vecDim != -1) {
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
}
return volume(dims) * std::max(batch, 1);
}
inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
sample::gLogWarning
<< "Vector too long, only first 8 elements are used in dimension."
<< std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename T>
inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
if (std::is_integral<T>::value) {
std::uniform_int_distribution<int> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
} else {
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
}
// Specialization needed for custom type __half
template <typename H>
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
H* typedBuffer = static_cast<H*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<H>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <>
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
__half max) {
fillBufferHalf(buffer, volume, min, max);
}
template <typename T>
inline void dumpBuffer(const void* buffer, const std::string& separator,
std::ostream& os, const Dims& dims, const Dims& strides,
int32_t vectorDim, int32_t spv) {
const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
std::multiplies<int64_t>());
const T* typedBuffer = static_cast<const T*>(buffer);
std::string sep;
for (int64_t v = 0; v < volume; ++v) {
int64_t curV = v;
int32_t dataOffset = 0;
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
int32_t dimVal = curV % dims.d[dimIndex];
if (dimIndex == vectorDim) {
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
} else {
dataOffset +=
dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
}
curV /= dims.d[dimIndex];
ASSERT(curV >= 0);
}
os << sep << typedBuffer[dataOffset];
sep = separator;
}
}
inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
ASSERT(dst);
std::ifstream file(fileName, std::ios::in | std::ios::binary);
if (file.is_open()) {
file.read(dst, size);
file.close();
} else {
std::stringstream msg;
msg << "Cannot open file " << fileName << "!";
throw std::invalid_argument(msg.str());
}
}
struct Binding {
bool isInput{false};
std::unique_ptr<IMirroredBuffer> buffer;
int64_t volume{0};
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
void fill(const std::string& fileName) {
loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
buffer->getSize());
}
void fill() {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
break;
}
case nvinfer1::DataType::kINT32: {
fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kINT8: {
fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kFLOAT: {
fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
case nvinfer1::DataType::kHALF: {
fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
}
}
void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
int32_t spv, const std::string separator = " ") const {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT32: {
dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT8: {
dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kFLOAT: {
dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kHALF: {
dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
}
}
};
class Bindings {
public:
Bindings() = delete;
explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
nvinfer1::DataType dataType,
const std::string& fileName = "") {
while (mBindings.size() <= static_cast<size_t>(b)) {
mBindings.emplace_back();
mDevicePointers.emplace_back();
}
mNames[name] = b;
if (mBindings[b].buffer == nullptr) {
if (mUseManaged) {
mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
} else {
mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
}
}
mBindings[b].isInput = isInput;
// Some memory allocators return nullptr when allocating zero bytes, but
// TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
if (volume == 0) {
mBindings[b].buffer->allocate(1);
} else {
mBindings[b].buffer->allocate(
static_cast<size_t>(volume) *
static_cast<size_t>(dataTypeSize(dataType)));
}
mBindings[b].volume = volume;
mBindings[b].dataType = dataType;
mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
if (isInput) {
if (fileName.empty()) {
fill(b);
} else {
fill(b, fileName);
}
}
}
void** getDeviceBuffers() { return mDevicePointers.data(); }
void transferInputToDevice(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (mBindings[b.second].isInput) {
mBindings[b.second].buffer->hostToDevice(stream);
}
}
}
void transferOutputToHost(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (!mBindings[b.second].isInput) {
mBindings[b.second].buffer->deviceToHost(stream);
}
}
}
void fill(int binding, const std::string& fileName) {
mBindings[binding].fill(fileName);
}
void fill(int binding) { mBindings[binding].fill(); }
void dumpBindingDimensions(int binding,
const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
const auto dims = context.getBindingDimensions(binding);
// Do not add a newline terminator, because the caller may be outputting a
// JSON string.
os << dims;
}
void dumpBindingValues(const nvinfer1::IExecutionContext& context,
int binding, std::ostream& os,
const std::string& separator = " ",
int32_t batch = 1) const {
Dims dims = context.getBindingDimensions(binding);
Dims strides = context.getStrides(binding);
int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
const int32_t spv =
context.getEngine().getBindingComponentsPerElement(binding);
if (context.getEngine().hasImplicitBatchDimension()) {
auto insertN = [](Dims& d, int32_t bs) {
const int32_t nbDims = d.nbDims;
ASSERT(nbDims < Dims::MAX_DIMS);
std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
d.d[0] = bs;
d.nbDims = nbDims + 1;
};
int32_t batchStride = 0;
for (int32_t i = 0; i < strides.nbDims; ++i) {
if (strides.d[i] * dims.d[i] > batchStride) {
batchStride = strides.d[i] * dims.d[i];
}
}
insertN(dims, batch);
insertN(strides, batchStride);
vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
}
mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
}
void dumpInputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isInput = [](const Binding& b) { return b.isInput; };
dumpBindings(context, isInput, os);
}
void dumpOutputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
dumpBindings(context, isOutput, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto all = [](const Binding& b) { return true; };
dumpBindings(context, all, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
bool (*predicate)(const Binding& b),
std::ostream& os) const {
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
os << n.first << ": (";
dumpBindingDimensions(binding, context, os);
os << ")" << std::endl;
dumpBindingValues(context, binding, os);
os << std::endl;
}
}
}
std::unordered_map<std::string, int> getInputBindings() const {
auto isInput = [](const Binding& b) { return b.isInput; };
return getBindings(isInput);
}
std::unordered_map<std::string, int> getOutputBindings() const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
return getBindings(isOutput);
}
std::unordered_map<std::string, int> getBindings() const {
auto all = [](const Binding& b) { return true; };
return getBindings(all);
}
std::unordered_map<std::string, int>
getBindings(bool (*predicate)(const Binding& b)) const {
std::unordered_map<std::string, int> bindings;
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
bindings.insert(n);
}
}
return bindings;
}
private:
std::unordered_map<std::string, int32_t> mNames;
std::vector<Binding> mBindings;
std::vector<void*> mDevicePointers;
bool mUseManaged{false};
};
template <typename T> struct TrtDestroyer {
void operator()(T* t) { t->destroy(); }
};
template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
size_t nbBindings, bool isInput = true) {
bool broadcast = formats.size() == 1;
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
if (!formats.empty() && !validFormatsCount) {
if (isInput) {
throw std::invalid_argument(
"The number of inputIOFormats must match network's inputs or be one "
"for broadcasting.");
} else {
throw std::invalid_argument(
"The number of outputIOFormats must match network's outputs or be "
"one for broadcasting.");
}
}
return broadcast;
}
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
if (!iFile) {
sample::gLogWarning << "Could not read timing cache from: " << inFileName
<< ". A new timing cache will be generated and written."
<< std::endl;
return std::vector<char>();
}
iFile.seekg(0, std::ifstream::end);
size_t fsize = iFile.tellg();
iFile.seekg(0, std::ifstream::beg);
std::vector<char> content(fsize);
iFile.read(content.data(), fsize);
iFile.close();
sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
<< inFileName << std::endl;
return content;
}
inline void saveTimingCacheFile(const std::string outFileName,
const IHostMemory* blob) {
std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
if (!oFile) {
sample::gLogWarning << "Could not write timing cache to: " << outFileName
<< std::endl;
return;
}
oFile.write((char*)blob->data(), blob->size());
oFile.close();
sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
<< outFileName << std::endl;
}
inline int32_t getCudaDriverVersion() {
int32_t version{-1};
cudaCheck(cudaDriverGetVersion(&version));
return version;
}
inline int32_t getCudaRuntimeVersion() {
int32_t version{-1};
cudaCheck(cudaRuntimeGetVersion(&version));
return version;
}
} // namespace sample
#endif // TRT_SAMPLE_UTILS_H