mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-16 13:41:30 +08:00
[Other] Optimize code style (#1032)
* Optimize code * optimize code * optimize code * fix compile error
This commit is contained in:
@@ -17,108 +17,106 @@
|
||||
namespace fastdeploy {
|
||||
|
||||
nvinfer1::PluginFieldCollection AdaptivePool2dPluginCreator::mFC{};
|
||||
std::vector<nvinfer1::PluginField> AdaptivePool2dPluginCreator::mPluginAttributes;
|
||||
std::vector<nvinfer1::PluginField>
|
||||
AdaptivePool2dPluginCreator::mPluginAttributes;
|
||||
|
||||
pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n, const void* input, void* output);
|
||||
pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n,
|
||||
const void* input, void* output);
|
||||
|
||||
AdaptivePool2d::AdaptivePool2d(std::vector<int32_t> output_size, std::string pooling_type) {
|
||||
AdaptivePool2d::AdaptivePool2d(std::vector<int32_t> output_size,
|
||||
std::string pooling_type) {
|
||||
output_size_ = output_size;
|
||||
pooling_type_ = pooling_type;
|
||||
}
|
||||
|
||||
AdaptivePool2d::AdaptivePool2d(const void* buffer, size_t length) {
|
||||
const char *d = reinterpret_cast<const char*>(buffer), *a = d;
|
||||
output_size_.resize(4);
|
||||
for(int64_t i =0 ; i < 4; i++){
|
||||
output_size_[i] =read<int32_t>(d);
|
||||
}
|
||||
if(read<int32_t>(d) == 0){
|
||||
pooling_type_ = "avg";
|
||||
}else{
|
||||
pooling_type_ = "max";
|
||||
}
|
||||
FDASSERT(d == a + length, "deserialize failed.");
|
||||
const char *d = reinterpret_cast<const char*>(buffer), *a = d;
|
||||
output_size_.resize(4);
|
||||
for (int64_t i = 0; i < 4; i++) {
|
||||
output_size_[i] = read<int32_t>(d);
|
||||
}
|
||||
if (read<int32_t>(d) == 0) {
|
||||
pooling_type_ = "avg";
|
||||
} else {
|
||||
pooling_type_ = "max";
|
||||
}
|
||||
FDASSERT(d == a + length, "deserialize failed.");
|
||||
}
|
||||
|
||||
int AdaptivePool2d::getNbOutputs() const noexcept {
|
||||
return 1;
|
||||
}
|
||||
int AdaptivePool2d::getNbOutputs() const noexcept { return 1; }
|
||||
|
||||
nvinfer1::DimsExprs AdaptivePool2d::getOutputDimensions(
|
||||
int outputIndex, const nvinfer1::DimsExprs* inputs,
|
||||
int nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept {
|
||||
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder& exprBuilder) noexcept {
|
||||
try {
|
||||
nvinfer1::DimsExprs output(inputs[0]);
|
||||
output.d[2] = exprBuilder.constant(static_cast<int32_t>(output_size_[2]));
|
||||
output.d[3] = exprBuilder.constant(static_cast<int32_t>(output_size_[3]));
|
||||
return output;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
FDASSERT(false, "getOutputDimensions failed: %s.",e.what());
|
||||
} catch (const std::exception& e) {
|
||||
FDASSERT(false, "getOutputDimensions failed: %s.", e.what());
|
||||
}
|
||||
return nvinfer1::DimsExprs{};
|
||||
}
|
||||
|
||||
int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
|
||||
const nvinfer1::PluginTensorDesc* outputDesc,
|
||||
const void* const* inputs,
|
||||
void* const* outputs,
|
||||
void* workspace,
|
||||
cudaStream_t stream) noexcept {
|
||||
int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
|
||||
const nvinfer1::PluginTensorDesc* outputDesc,
|
||||
const void* const* inputs, void* const* outputs,
|
||||
void* workspace, cudaStream_t stream) noexcept {
|
||||
if (inputDesc[0].type != nvinfer1::DataType::kFLOAT) {
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
auto const* data = static_cast<float const*>(inputs[0]);
|
||||
auto* result = static_cast<float*>(outputs[0]);
|
||||
int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] * outputDesc[0].dims.d[2]* outputDesc[0].dims.d[3];
|
||||
int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] *
|
||||
outputDesc[0].dims.d[2] * outputDesc[0].dims.d[3];
|
||||
std::vector<int64_t> input_size, output_size;
|
||||
for(int i =0; i< 4; i++){
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_size.push_back(inputDesc[0].dims.d[i]);
|
||||
output_size.push_back(outputDesc[0].dims.d[i]);
|
||||
}
|
||||
CudaAdaptivePool(input_size, output_size, result, data, stream, pooling_type_);
|
||||
CudaAdaptivePool(input_size, output_size, result, data, stream,
|
||||
pooling_type_);
|
||||
return cudaPeekAtLastError();
|
||||
}
|
||||
|
||||
size_t AdaptivePool2d::getSerializationSize() const noexcept {
|
||||
return 5 * sizeof(int32_t) ;
|
||||
return 5 * sizeof(int32_t);
|
||||
}
|
||||
|
||||
void AdaptivePool2d::serialize(void* buffer) const noexcept {
|
||||
void AdaptivePool2d::serialize(void* buffer) const noexcept {
|
||||
char *d = reinterpret_cast<char*>(buffer), *a = d;
|
||||
for(int64_t i=0; i< 4; i++){
|
||||
for (int64_t i = 0; i < 4; i++) {
|
||||
write(d, output_size_[i]);
|
||||
}
|
||||
int32_t pooling_type_val = 0;
|
||||
if(pooling_type_ != "avg"){
|
||||
if (pooling_type_ != "avg") {
|
||||
pooling_type_val = 1;
|
||||
}
|
||||
write(d, pooling_type_val);
|
||||
FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()");
|
||||
}
|
||||
|
||||
nvinfer1::DataType AdaptivePool2d::getOutputDataType(
|
||||
int index, const nvinfer1::DataType* inputType, int nbInputs) const noexcept {
|
||||
nvinfer1::DataType
|
||||
AdaptivePool2d::getOutputDataType(int index,
|
||||
const nvinfer1::DataType* inputType,
|
||||
int nbInputs) const noexcept {
|
||||
return inputType[0];
|
||||
}
|
||||
|
||||
bool AdaptivePool2d::supportsFormatCombination(
|
||||
int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) noexcept {
|
||||
int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs,
|
||||
int nbOutputs) noexcept {
|
||||
return (inOut[pos].format == nvinfer1::PluginFormat::kLINEAR);
|
||||
}
|
||||
|
||||
int AdaptivePool2d::initialize() noexcept {
|
||||
return 0;
|
||||
}
|
||||
int AdaptivePool2d::initialize() noexcept { return 0; }
|
||||
|
||||
void AdaptivePool2d::terminate() noexcept {
|
||||
return;
|
||||
}
|
||||
void AdaptivePool2d::terminate() noexcept { return; }
|
||||
|
||||
size_t AdaptivePool2d::getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
|
||||
int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc* outputs,
|
||||
int nbOutputs) const noexcept {
|
||||
size_t AdaptivePool2d::getWorkspaceSize(
|
||||
const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const noexcept {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -126,33 +124,32 @@ const char* AdaptivePool2d::getPluginType() const noexcept {
|
||||
return "AdaptivePool2d";
|
||||
}
|
||||
|
||||
const char* AdaptivePool2d::getPluginVersion() const noexcept {
|
||||
return "1";
|
||||
}
|
||||
const char* AdaptivePool2d::getPluginVersion() const noexcept { return "1"; }
|
||||
|
||||
void AdaptivePool2d::destroy() noexcept {
|
||||
void AdaptivePool2d::destroy() noexcept { return; }
|
||||
void AdaptivePool2d::configurePlugin(
|
||||
const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept {
|
||||
return;
|
||||
}
|
||||
void AdaptivePool2d::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept {
|
||||
return;
|
||||
}
|
||||
nvinfer1::IPluginV2DynamicExt* AdaptivePool2d::clone() const noexcept {
|
||||
try{
|
||||
nvinfer1::IPluginV2DynamicExt* plugin = new AdaptivePool2d(output_size_, pooling_type_);
|
||||
plugin->setPluginNamespace(mNamespace.c_str());
|
||||
return plugin;
|
||||
}
|
||||
catch (std::exception const& e){
|
||||
FDASSERT(false, "clone failed: %s.",e.what());
|
||||
try {
|
||||
nvinfer1::IPluginV2DynamicExt* plugin =
|
||||
new AdaptivePool2d(output_size_, pooling_type_);
|
||||
plugin->setPluginNamespace(mNamespace.c_str());
|
||||
return plugin;
|
||||
} catch (std::exception const& e) {
|
||||
FDASSERT(false, "clone failed: %s.", e.what());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AdaptivePool2dPluginCreator::AdaptivePool2dPluginCreator() {
|
||||
mPluginAttributes.clear();
|
||||
mPluginAttributes.emplace_back(nvinfer1::PluginField("output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4));
|
||||
mPluginAttributes.emplace_back(nvinfer1::PluginField("pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3));
|
||||
mPluginAttributes.emplace_back(nvinfer1::PluginField(
|
||||
"output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4));
|
||||
mPluginAttributes.emplace_back(nvinfer1::PluginField(
|
||||
"pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3));
|
||||
|
||||
mFC.nbFields = mPluginAttributes.size();
|
||||
mFC.fields = mPluginAttributes.data();
|
||||
@@ -166,17 +163,18 @@ const char* AdaptivePool2dPluginCreator::getPluginVersion() const noexcept {
|
||||
return "1";
|
||||
}
|
||||
|
||||
const nvinfer1::PluginFieldCollection* AdaptivePool2dPluginCreator::getFieldNames() noexcept {
|
||||
const nvinfer1::PluginFieldCollection*
|
||||
AdaptivePool2dPluginCreator::getFieldNames() noexcept {
|
||||
return &mFC;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const char* name,
|
||||
const nvinfer1::PluginFieldCollection* fc) noexcept {
|
||||
try{
|
||||
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(
|
||||
const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept {
|
||||
try {
|
||||
const nvinfer1::PluginField* fields = fc->fields;
|
||||
auto const dims = static_cast<int32_t const*>(fields[0].data);
|
||||
output_size_.resize(4);
|
||||
for(int64_t i = 0; i < 4; i++){
|
||||
for (int64_t i = 0; i < 4; i++) {
|
||||
output_size_[i] = dims[i];
|
||||
}
|
||||
|
||||
@@ -184,23 +182,20 @@ nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const c
|
||||
std::string pooling_type(pooling_type_ptr, 3);
|
||||
pooling_type_ = pooling_type;
|
||||
return new AdaptivePool2d(output_size_, pooling_type_);
|
||||
}
|
||||
catch (std::exception const& e){
|
||||
FDASSERT(false, "createPlugin failed: %s.",e.what());
|
||||
} catch (std::exception const& e) {
|
||||
FDASSERT(false, "createPlugin failed: %s.", e.what());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin(const char* name,
|
||||
const void* serialData,
|
||||
size_t serialLength) noexcept {
|
||||
try{
|
||||
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin(
|
||||
const char* name, const void* serialData, size_t serialLength) noexcept {
|
||||
try {
|
||||
return new AdaptivePool2d(serialData, serialLength);
|
||||
}
|
||||
catch (std::exception const& e){
|
||||
FDASSERT(false, "deserializePlugin failed: %s.",e.what());
|
||||
} catch (std::exception const& e) {
|
||||
FDASSERT(false, "deserializePlugin failed: %s.", e.what());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace fastdeploy
|
@@ -13,98 +13,93 @@
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include "common.h" // NOLINT
|
||||
#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
|
||||
#include "common.h" // NOLINT
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
class AdaptivePool2d : public BasePlugin {
|
||||
public:
|
||||
AdaptivePool2d(std::vector<int32_t> output_size, std::string pooling_type);
|
||||
AdaptivePool2d(std::vector<int32_t> output_size, std::string pooling_type);
|
||||
|
||||
AdaptivePool2d(const void* buffer, size_t length);
|
||||
AdaptivePool2d(const void* buffer, size_t length);
|
||||
|
||||
~AdaptivePool2d() override = default;
|
||||
~AdaptivePool2d() override = default;
|
||||
|
||||
int getNbOutputs() const noexcept override;
|
||||
int getNbOutputs() const noexcept override;
|
||||
|
||||
nvinfer1::DimsExprs getOutputDimensions(
|
||||
int outputIndex,
|
||||
const nvinfer1::DimsExprs* inputs,
|
||||
int nbInputs,
|
||||
nvinfer1::IExprBuilder& exprBuilder) noexcept override;
|
||||
nvinfer1::DimsExprs
|
||||
getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs* inputs,
|
||||
int nbInputs,
|
||||
nvinfer1::IExprBuilder& exprBuilder) noexcept override;
|
||||
|
||||
nvinfer1::DataType getOutputDataType(
|
||||
int index,
|
||||
const nvinfer1::DataType* inputType,
|
||||
int nbInputs) const noexcept override;
|
||||
nvinfer1::DataType getOutputDataType(int index,
|
||||
const nvinfer1::DataType* inputType,
|
||||
int nbInputs) const noexcept override;
|
||||
|
||||
bool supportsFormatCombination(
|
||||
int pos,
|
||||
const nvinfer1::PluginTensorDesc* inOut,
|
||||
int nbInputs,
|
||||
int nbOutputs) noexcept override;
|
||||
bool supportsFormatCombination(int pos,
|
||||
const nvinfer1::PluginTensorDesc* inOut,
|
||||
int nbInputs, int nbOutputs) noexcept override;
|
||||
|
||||
int initialize() noexcept override;
|
||||
int initialize() noexcept override;
|
||||
|
||||
void terminate() noexcept override;
|
||||
void terminate() noexcept override;
|
||||
|
||||
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
|
||||
int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc* outputs,
|
||||
int nbOutputs) const noexcept override;
|
||||
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
|
||||
int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc* outputs,
|
||||
int nbOutputs) const noexcept override;
|
||||
|
||||
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
|
||||
const nvinfer1::PluginTensorDesc* outputDesc,
|
||||
const void* const* inputs,
|
||||
void* const* outputs,
|
||||
void* workspace,
|
||||
cudaStream_t stream) noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
|
||||
const nvinfer1::PluginTensorDesc* outputDesc,
|
||||
const void* const* inputs, void* const* outputs, void* workspace,
|
||||
cudaStream_t stream) noexcept override;
|
||||
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
|
||||
void serialize(void* buffer) const noexcept override;
|
||||
void serialize(void* buffer) const noexcept override;
|
||||
|
||||
const char* getPluginType() const noexcept override;
|
||||
const char* getPluginType() const noexcept override;
|
||||
|
||||
const char* getPluginVersion() const noexcept override;
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
|
||||
int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc* out,
|
||||
int nbOutputs) noexcept override;
|
||||
void destroy() noexcept override;
|
||||
const char* getPluginVersion() const noexcept override;
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
|
||||
int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc* out,
|
||||
int nbOutputs) noexcept override;
|
||||
void destroy() noexcept override;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* clone() const noexcept override;
|
||||
nvinfer1::IPluginV2DynamicExt* clone() const noexcept override;
|
||||
|
||||
private:
|
||||
std::vector<int32_t> output_size_;
|
||||
std::string pooling_type_;
|
||||
std::vector<int32_t> output_size_;
|
||||
std::string pooling_type_;
|
||||
};
|
||||
|
||||
class AdaptivePool2dPluginCreator : public BaseCreator {
|
||||
public:
|
||||
AdaptivePool2dPluginCreator();
|
||||
AdaptivePool2dPluginCreator();
|
||||
|
||||
~AdaptivePool2dPluginCreator() override = default;
|
||||
~AdaptivePool2dPluginCreator() override = default;
|
||||
|
||||
const char* getPluginName() const noexcept override;
|
||||
const char* getPluginName() const noexcept override;
|
||||
|
||||
const char* getPluginVersion() const noexcept override;
|
||||
const char* getPluginVersion() const noexcept override;
|
||||
|
||||
const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override;
|
||||
const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name,
|
||||
const nvinfer1::PluginFieldCollection* fc) noexcept override;
|
||||
nvinfer1::IPluginV2DynamicExt*
|
||||
createPlugin(const char* name,
|
||||
const nvinfer1::PluginFieldCollection* fc) noexcept override;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name,
|
||||
const void* serialData,
|
||||
size_t serialLength) noexcept override;
|
||||
nvinfer1::IPluginV2DynamicExt*
|
||||
deserializePlugin(const char* name, const void* serialData,
|
||||
size_t serialLength) noexcept override;
|
||||
|
||||
private:
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::vector<int32_t> output_size_;
|
||||
std::string pooling_type_;
|
||||
static nvinfer1::PluginFieldCollection mFC;
|
||||
static std::vector<nvinfer1::PluginField> mPluginAttributes;
|
||||
std::vector<int32_t> output_size_;
|
||||
std::string pooling_type_;
|
||||
};
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(AdaptivePool2dPluginCreator);
|
||||
|
@@ -17,40 +17,40 @@
|
||||
#include "NvInferPlugin.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
class BasePlugin : public nvinfer1::IPluginV2DynamicExt {
|
||||
protected:
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
|
||||
const char* getPluginNamespace() const noexcept override {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
const char* getPluginNamespace() const noexcept override {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
std::string mNamespace;
|
||||
std::string mNamespace;
|
||||
};
|
||||
|
||||
class BaseCreator : public nvinfer1::IPluginCreator {
|
||||
public:
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
void setPluginNamespace(const char* libNamespace) noexcept override {
|
||||
mNamespace = libNamespace;
|
||||
}
|
||||
|
||||
const char* getPluginNamespace() const noexcept override {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
const char* getPluginNamespace() const noexcept override {
|
||||
return mNamespace.c_str();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string mNamespace;
|
||||
std::string mNamespace;
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
@@ -62,19 +62,17 @@ typedef enum {
|
||||
} pluginStatus_t;
|
||||
|
||||
// Write values into buffer
|
||||
template <typename T>
|
||||
void write(char*& buffer, const T& val) {
|
||||
std::memcpy(buffer, &val, sizeof(T));
|
||||
buffer += sizeof(T);
|
||||
template <typename T> void write(char*& buffer, const T& val) {
|
||||
std::memcpy(buffer, &val, sizeof(T));
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
|
||||
// Read values from buffer
|
||||
template <typename T>
|
||||
T read(const char*& buffer) {
|
||||
T val{};
|
||||
std::memcpy(&val, buffer, sizeof(T));
|
||||
buffer += sizeof(T);
|
||||
return val;
|
||||
template <typename T> T read(const char*& buffer) {
|
||||
T val{};
|
||||
std::memcpy(&val, buffer, sizeof(T));
|
||||
buffer += sizeof(T);
|
||||
return val;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
@@ -134,9 +134,9 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
int calibration_cache_size = 0;
|
||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||
&model_content_ptr, &model_content_size, 11, true,
|
||||
verbose, true, true, true, ops.data(),
|
||||
1, "tensorrt",
|
||||
&calibration_cache_ptr, &calibration_cache_size, "", &save_external_)) {
|
||||
verbose, true, true, true, ops.data(), 1, "tensorrt",
|
||||
&calibration_cache_ptr, &calibration_cache_size, "",
|
||||
&save_external_)) {
|
||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||
<< std::endl;
|
||||
return false;
|
||||
@@ -152,11 +152,11 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
calibration_str_ = calibration_str;
|
||||
delete[] calibration_cache_ptr;
|
||||
}
|
||||
if(save_external_){
|
||||
if (save_external_) {
|
||||
model_file_name_ = "model.onnx";
|
||||
std::fstream f(model_file_name_, std::ios::out);
|
||||
FDASSERT(f.is_open(), "Can not open file: %s to save model.",
|
||||
model_file_name_.c_str());
|
||||
model_file_name_.c_str());
|
||||
f << onnx_model_proto;
|
||||
f.close();
|
||||
return InitFromOnnx(model_file_name_, option, false);
|
||||
@@ -215,13 +215,14 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
outputs_desc_.resize(onnx_reader.num_outputs);
|
||||
for (int i = 0; i < onnx_reader.num_inputs; ++i) {
|
||||
std::string name(onnx_reader.inputs[i].name);
|
||||
std::vector<int64_t> shape(
|
||||
onnx_reader.inputs[i].shape,
|
||||
onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank);
|
||||
std::vector<int64_t> shape(onnx_reader.inputs[i].shape,
|
||||
onnx_reader.inputs[i].shape +
|
||||
onnx_reader.inputs[i].rank);
|
||||
inputs_desc_[i].name = name;
|
||||
inputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||
inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
|
||||
inputs_desc_[i].original_dtype = ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype);
|
||||
inputs_desc_[i].original_dtype =
|
||||
ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype);
|
||||
auto info = ShapeRangeInfo(shape);
|
||||
info.name = name;
|
||||
auto iter_min = option.min_shape.find(name);
|
||||
@@ -237,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
|
||||
for (int i = 0; i < onnx_reader.num_outputs; ++i) {
|
||||
std::string name(onnx_reader.outputs[i].name);
|
||||
std::vector<int64_t> shape(
|
||||
onnx_reader.outputs[i].shape,
|
||||
onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank);
|
||||
std::vector<int64_t> shape(onnx_reader.outputs[i].shape,
|
||||
onnx_reader.outputs[i].shape +
|
||||
onnx_reader.outputs[i].rank);
|
||||
outputs_desc_[i].name = name;
|
||||
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||
outputs_desc_[i].dtype =
|
||||
@@ -252,10 +253,10 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
stream_ = reinterpret_cast<cudaStream_t>(option_.external_stream_);
|
||||
} else {
|
||||
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
||||
}
|
||||
|
||||
if(save_external_){
|
||||
if (save_external_) {
|
||||
onnx_content.clear();
|
||||
onnx_content = model_file_name_;
|
||||
}
|
||||
@@ -283,8 +284,7 @@ int TrtBackend::ShapeRangeInfoUpdated(const std::vector<FDTensor>& inputs) {
|
||||
}
|
||||
|
||||
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs,
|
||||
bool copy_to_fd) {
|
||||
std::vector<FDTensor>* outputs, bool copy_to_fd) {
|
||||
if (inputs.size() != NumInputs()) {
|
||||
FDERROR << "Require " << NumInputs() << "inputs, but get " << inputs.size()
|
||||
<< "." << std::endl;
|
||||
@@ -297,7 +297,8 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
<< "TensorRT engine will be rebuilt once shape range information "
|
||||
"changed, this may take lots of time, you can set a proper shape "
|
||||
"range before loading model to avoid rebuilding process. refer "
|
||||
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/"
|
||||
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/"
|
||||
"faq/"
|
||||
"tensorrt_tricks.md for more details."
|
||||
<< std::endl;
|
||||
BuildTrtEngine();
|
||||
@@ -314,38 +315,42 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
for (size_t i = 0; i < outputs->size(); ++i) {
|
||||
// if the final output tensor's dtype is different from the model output tensor's dtype,
|
||||
// then we need cast the data to the final output's dtype
|
||||
auto model_output_dtype = GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
|
||||
auto model_output_dtype =
|
||||
GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
|
||||
if ((*outputs)[i].dtype != model_output_dtype) {
|
||||
FDTensor output_tensor;
|
||||
output_tensor.SetExternalData((*outputs)[i].shape, model_output_dtype,
|
||||
outputs_device_buffer_[(*outputs)[i].name].data(),
|
||||
Device::GPU);
|
||||
|
||||
casted_output_tensors_[(*outputs)[i].name].Resize((*outputs)[i].shape, (*outputs)[i].dtype,
|
||||
(*outputs)[i].name, Device::GPU);
|
||||
function::CudaCast(output_tensor, &casted_output_tensors_[(*outputs)[i].name], stream_);
|
||||
if(!copy_to_fd) {
|
||||
(*outputs)[i].SetExternalData((*outputs)[i].shape, model_output_dtype,
|
||||
casted_output_tensors_[(*outputs)[i].name].MutableData(),
|
||||
Device::GPU, option_.gpu_id);
|
||||
output_tensor.SetExternalData(
|
||||
(*outputs)[i].shape, model_output_dtype,
|
||||
outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU);
|
||||
|
||||
casted_output_tensors_[(*outputs)[i].name].Resize(
|
||||
(*outputs)[i].shape, (*outputs)[i].dtype, (*outputs)[i].name,
|
||||
Device::GPU);
|
||||
function::CudaCast(output_tensor,
|
||||
&casted_output_tensors_[(*outputs)[i].name], stream_);
|
||||
if (!copy_to_fd) {
|
||||
(*outputs)[i].SetExternalData(
|
||||
(*outputs)[i].shape, model_output_dtype,
|
||||
casted_output_tensors_[(*outputs)[i].name].MutableData(),
|
||||
Device::GPU, option_.gpu_id);
|
||||
}
|
||||
} else {
|
||||
casted_output_tensors_[(*outputs)[i].name].SetExternalData(
|
||||
(*outputs)[i].shape, model_output_dtype,
|
||||
outputs_device_buffer_[(*outputs)[i].name].data(),
|
||||
Device::GPU);
|
||||
outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU);
|
||||
}
|
||||
}
|
||||
if (copy_to_fd) {
|
||||
for (size_t i = 0; i < outputs->size(); ++i) {
|
||||
FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
|
||||
casted_output_tensors_[(*outputs)[i].name].Data(),
|
||||
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
|
||||
stream_) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU.");
|
||||
FDASSERT(
|
||||
cudaMemcpyAsync((*outputs)[i].Data(),
|
||||
casted_output_tensors_[(*outputs)[i].name].Data(),
|
||||
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
|
||||
stream_) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU.");
|
||||
}
|
||||
FDASSERT(cudaStreamSynchronize(stream_) == cudaSuccess,
|
||||
"[ERROR] Error occurs while sync cuda stream.");
|
||||
"[ERROR] Error occurs while sync cuda stream.");
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -356,10 +361,12 @@ void TrtBackend::GetInputOutputInfo() {
|
||||
std::unordered_map<std::string, FDDataType> inputs_original_dtype_map;
|
||||
std::unordered_map<std::string, FDDataType> outputs_original_dtype_map;
|
||||
for (size_t i = 0; i < inputs_desc_.size(); ++i) {
|
||||
inputs_original_dtype_map[inputs_desc_[i].name] = inputs_desc_[i].original_dtype;
|
||||
inputs_original_dtype_map[inputs_desc_[i].name] =
|
||||
inputs_desc_[i].original_dtype;
|
||||
}
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
outputs_original_dtype_map[outputs_desc_[i].name] = outputs_desc_[i].original_dtype;
|
||||
outputs_original_dtype_map[outputs_desc_[i].name] =
|
||||
outputs_desc_[i].original_dtype;
|
||||
}
|
||||
|
||||
// Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_
|
||||
@@ -373,12 +380,18 @@ void TrtBackend::GetInputOutputInfo() {
|
||||
auto shape = ToVec(engine_->getBindingDimensions(i));
|
||||
auto dtype = engine_->getBindingDataType(i);
|
||||
if (engine_->bindingIsInput(i)) {
|
||||
auto original_dtype = inputs_original_dtype_map.count(name) ? inputs_original_dtype_map[name] : GetFDDataType(dtype);
|
||||
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype});
|
||||
auto original_dtype = inputs_original_dtype_map.count(name)
|
||||
? inputs_original_dtype_map[name]
|
||||
: GetFDDataType(dtype);
|
||||
inputs_desc_.emplace_back(
|
||||
TrtValueInfo{name, shape, dtype, original_dtype});
|
||||
inputs_device_buffer_[name] = FDDeviceBuffer(dtype);
|
||||
} else {
|
||||
auto original_dtype = outputs_original_dtype_map.count(name) ? outputs_original_dtype_map[name] : GetFDDataType(dtype);
|
||||
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype});
|
||||
auto original_dtype = outputs_original_dtype_map.count(name)
|
||||
? outputs_original_dtype_map[name]
|
||||
: GetFDDataType(dtype);
|
||||
outputs_desc_.emplace_back(
|
||||
TrtValueInfo{name, shape, dtype, original_dtype});
|
||||
outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
|
||||
casted_output_tensors_[name] = FDTensor();
|
||||
}
|
||||
@@ -391,8 +404,9 @@ void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
|
||||
for (const auto& item : inputs) {
|
||||
// auto idx = engine_->getBindingIndex(item.name.c_str());
|
||||
auto iter = io_name_index_.find(item.name);
|
||||
FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str());
|
||||
auto idx = iter->second;
|
||||
FDASSERT(iter != io_name_index_.end(),
|
||||
"TRTBackend SetInputs not find name:%s", item.name.c_str());
|
||||
auto idx = iter->second;
|
||||
std::vector<int> shape(item.shape.begin(), item.shape.end());
|
||||
auto dims = ToDims(shape);
|
||||
context_->setBindingDimensions(idx, dims);
|
||||
@@ -424,9 +438,8 @@ void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
|
||||
"Error occurs while copy memory from CPU to GPU.");
|
||||
} else {
|
||||
FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
|
||||
item.Data(),
|
||||
item.Nbytes(), cudaMemcpyHostToDevice,
|
||||
stream_) == 0,
|
||||
item.Data(), item.Nbytes(),
|
||||
cudaMemcpyHostToDevice, stream_) == 0,
|
||||
"Error occurs while copy memory from CPU to GPU.");
|
||||
}
|
||||
}
|
||||
@@ -443,8 +456,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
// auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||
auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
|
||||
FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str());
|
||||
auto idx = idx_iter->second;
|
||||
FDASSERT(idx_iter != io_name_index_.end(),
|
||||
"TRTBackend Outputs not find name:%s",
|
||||
outputs_desc_[i].name.c_str());
|
||||
auto idx = idx_iter->second;
|
||||
auto output_dims = context_->getBindingDimensions(idx);
|
||||
|
||||
// find the original index of output
|
||||
@@ -457,23 +472,22 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
|
||||
|
||||
// Allocate output buffer memory
|
||||
outputs_device_buffer_[outputs_desc_[i].name].resize(output_dims);
|
||||
|
||||
|
||||
// binding output buffer
|
||||
bindings_[idx] = outputs_device_buffer_[outputs_desc_[i].name].data();
|
||||
|
||||
bindings_[idx] = outputs_device_buffer_[outputs_desc_[i].name].data();
|
||||
|
||||
// set user's outputs info
|
||||
std::vector<int64_t> shape(output_dims.d,
|
||||
output_dims.d + output_dims.nbDims);
|
||||
if(copy_to_fd) {
|
||||
if (copy_to_fd) {
|
||||
(*outputs)[ori_idx].is_pinned_memory = option_.enable_pinned_memory;
|
||||
(*outputs)[ori_idx].Resize(shape, outputs_desc_[i].original_dtype,
|
||||
outputs_desc_[i].name);
|
||||
} else {
|
||||
(*outputs)[ori_idx].name = outputs_desc_[i].name;
|
||||
(*outputs)[ori_idx].SetExternalData(
|
||||
shape, outputs_desc_[i].original_dtype,
|
||||
bindings_[idx], Device::GPU,
|
||||
option_.gpu_id);
|
||||
shape, outputs_desc_[i].original_dtype, bindings_[idx], Device::GPU,
|
||||
option_.gpu_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -587,7 +601,8 @@ bool TrtBackend::BuildTrtEngine() {
|
||||
if (option_.serialize_file != "") {
|
||||
FDINFO << "Serialize TensorRTEngine to local file "
|
||||
<< option_.serialize_file << "." << std::endl;
|
||||
std::ofstream engine_file(option_.serialize_file.c_str(), std::ios::binary | std::ios::out);
|
||||
std::ofstream engine_file(option_.serialize_file.c_str(),
|
||||
std::ios::binary | std::ios::out);
|
||||
if (!engine_file) {
|
||||
FDERROR << "Failed to open " << option_.serialize_file << " to write."
|
||||
<< std::endl;
|
||||
@@ -628,10 +643,11 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) {
|
||||
return false;
|
||||
}
|
||||
bool model_parser;
|
||||
if(save_external_){
|
||||
model_parser=!parser_->parseFromFile(onnx_model_buffer.c_str(), 0);
|
||||
}else{
|
||||
model_parser = !parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size());
|
||||
if (save_external_) {
|
||||
model_parser = !parser_->parseFromFile(onnx_model_buffer.c_str(), 0);
|
||||
} else {
|
||||
model_parser =
|
||||
!parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size());
|
||||
}
|
||||
if (model_parser) {
|
||||
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
|
||||
@@ -665,7 +681,8 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) {
|
||||
"should be noticed that FastDeploy will rebuild the engine while "
|
||||
"new input shape is out of the collected shape range, this may "
|
||||
"bring some time consuming problem, refer "
|
||||
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/"
|
||||
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/"
|
||||
"faq/"
|
||||
"tensorrt_tricks.md for more details."
|
||||
<< std::endl;
|
||||
initialized_ = true;
|
||||
@@ -721,27 +738,24 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> TrtBackend::Clone(void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
|
||||
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
|
||||
if(device_id > 0 && device_id != option_.gpu_id) {
|
||||
if (device_id > 0 && device_id != option_.gpu_id) {
|
||||
auto clone_option = option_;
|
||||
clone_option.gpu_id = device_id;
|
||||
clone_option.external_stream_ = stream;
|
||||
if (option_.model_format == ModelFormat::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
|
||||
"Clone model from ONNX failed while initialize TrtBackend.");
|
||||
"Clone model from ONNX failed while initialize TrtBackend.");
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option_.model_file,
|
||||
option_.params_file, clone_option),
|
||||
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||
FDASSERT(casted_backend->InitFromPaddle(
|
||||
option_.model_file, option_.params_file, clone_option),
|
||||
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||
}
|
||||
FDWARNING << "The target device id:"
|
||||
<< device_id
|
||||
<< " is different from current device id:"
|
||||
<< option_.gpu_id
|
||||
<< ", cannot share memory with current engine."
|
||||
<< std::endl;
|
||||
FDWARNING << "The target device id:" << device_id
|
||||
<< " is different from current device id:" << option_.gpu_id
|
||||
<< ", cannot share memory with current engine." << std::endl;
|
||||
return new_backend;
|
||||
}
|
||||
cudaSetDevice(option_.gpu_id);
|
||||
@@ -750,12 +764,15 @@ std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
|
||||
casted_backend->stream_ = reinterpret_cast<cudaStream_t>(stream);
|
||||
} else {
|
||||
FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0,
|
||||
"[ERROR] Error occurs while clone calling cudaStreamCreate().");
|
||||
"[ERROR] Error occurs while clone calling cudaStreamCreate().");
|
||||
}
|
||||
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
|
||||
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
|
||||
casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end());
|
||||
casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end());
|
||||
casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
|
||||
outputs_desc_.end());
|
||||
casted_backend->outputs_order_.insert(outputs_order_.begin(),
|
||||
outputs_order_.end());
|
||||
casted_backend->shape_range_info_.insert(shape_range_info_.begin(),
|
||||
shape_range_info_.end());
|
||||
casted_backend->engine_ = engine_;
|
||||
casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||
casted_backend->engine_->createExecutionContext());
|
||||
|
@@ -58,7 +58,7 @@ namespace fastdeploy {
|
||||
struct TrtValueInfo {
|
||||
std::string name;
|
||||
std::vector<int> shape;
|
||||
nvinfer1::DataType dtype; // dtype of TRT model
|
||||
nvinfer1::DataType dtype; // dtype of TRT model
|
||||
FDDataType original_dtype; // dtype of original ONNX/Paddle model
|
||||
};
|
||||
|
||||
@@ -97,8 +97,7 @@ class TrtBackend : public BaseBackend {
|
||||
bool InitFromOnnx(const std::string& model_file,
|
||||
const TrtBackendOption& option = TrtBackendOption(),
|
||||
bool from_memory_buffer = false);
|
||||
bool Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs,
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||
bool copy_to_fd = true) override;
|
||||
|
||||
int NumInputs() const { return inputs_desc_.size(); }
|
||||
@@ -107,7 +106,7 @@ class TrtBackend : public BaseBackend {
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
std::vector<TensorInfo> GetInputInfos() override;
|
||||
std::vector<TensorInfo> GetOutputInfos() override;
|
||||
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
|
||||
int device_id = -1) override;
|
||||
|
||||
~TrtBackend() {
|
||||
|
@@ -32,17 +32,15 @@
|
||||
namespace fastdeploy {
|
||||
|
||||
struct FDInferDeleter {
|
||||
template <typename T>
|
||||
void operator()(T* obj) const {
|
||||
template <typename T> void operator()(T* obj) const {
|
||||
if (obj) {
|
||||
delete obj;
|
||||
// obj->destroy();
|
||||
// obj->destroy();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
|
||||
template <typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
|
||||
|
||||
int64_t Volume(const nvinfer1::Dims& d);
|
||||
|
||||
@@ -72,17 +70,13 @@ std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename AllocFunc, typename FreeFunc>
|
||||
class FDGenericBuffer {
|
||||
template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
|
||||
public:
|
||||
//!
|
||||
//! \brief Construct an empty buffer.
|
||||
//!
|
||||
explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
|
||||
: mSize(0),
|
||||
mCapacity(0),
|
||||
mType(type),
|
||||
mBuffer(nullptr),
|
||||
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr),
|
||||
mExternal_buffer(nullptr) {}
|
||||
|
||||
//!
|
||||
@@ -104,9 +98,7 @@ class FDGenericBuffer {
|
||||
}
|
||||
|
||||
FDGenericBuffer(FDGenericBuffer&& buf)
|
||||
: mSize(buf.mSize),
|
||||
mCapacity(buf.mCapacity),
|
||||
mType(buf.mType),
|
||||
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
|
||||
mBuffer(buf.mBuffer) {
|
||||
buf.mSize = 0;
|
||||
buf.mCapacity = 0;
|
||||
@@ -133,7 +125,8 @@ class FDGenericBuffer {
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
void* data() {
|
||||
if (mExternal_buffer != nullptr) return mExternal_buffer;
|
||||
if (mExternal_buffer != nullptr)
|
||||
return mExternal_buffer;
|
||||
return mBuffer;
|
||||
}
|
||||
|
||||
@@ -141,7 +134,8 @@ class FDGenericBuffer {
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
const void* data() const {
|
||||
if (mExternal_buffer != nullptr) return mExternal_buffer;
|
||||
if (mExternal_buffer != nullptr)
|
||||
return mExternal_buffer;
|
||||
return mBuffer;
|
||||
}
|
||||
|
||||
@@ -213,8 +207,8 @@ class FDGenericBuffer {
|
||||
};
|
||||
|
||||
using FDDeviceBuffer = FDGenericBuffer<FDDeviceAllocator, FDDeviceFree>;
|
||||
using FDDeviceHostBuffer = FDGenericBuffer<FDDeviceHostAllocator,
|
||||
FDDeviceHostFree>;
|
||||
using FDDeviceHostBuffer =
|
||||
FDGenericBuffer<FDDeviceHostAllocator, FDDeviceHostFree>;
|
||||
|
||||
class FDTrtLogger : public nvinfer1::ILogger {
|
||||
public:
|
||||
|
Reference in New Issue
Block a user