FDTensor support GPU device (#190)

* fdtensor support GPU

* TRT backend support GPU FDTensor

* FDHostAllocator add FASTDEPLOY_DECL

* fix FDTensor Data

* fix FDTensor dtype

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
heliqi
2022-09-08 03:53:08 -05:00
committed by GitHub
parent bc8e9e4dae
commit 4d1f264d01
17 changed files with 432 additions and 153 deletions

View File

@@ -18,15 +18,17 @@
#include <string>
#include <vector>
#include "fastdeploy/core/allocate.h"
#include "fastdeploy/core/fd_type.h"
namespace fastdeploy {
struct FASTDEPLOY_DECL FDTensor {
std::vector<int8_t> data;
std::vector<int64_t> shape;
// std::vector<int8_t> data;
void* buffer_ = nullptr;
std::vector<int64_t> shape = {0};
std::string name = "";
FDDataType dtype;
FDDataType dtype = FDDataType::INT8;
// This use to skip memory copy step
// the external_data_ptr will point to the user allocated memory
@@ -46,28 +48,32 @@ struct FASTDEPLOY_DECL FDTensor {
// Get data buffer pointer
void* MutableData();
// Use this data to get the tensor data to process
// Since the most senario is process data in CPU
// this function weill return a pointer to cpu memory
// buffer.
// If the original data is on other device, the data
// will copy to cpu store in `temporary_cpu_buffer`
void* Data();
const void* Data() const;
// Use this data to get the tensor data to process
// Since the most senario is process data in CPU
// this function will return a pointer to cpu memory
// buffer.
// If the original data is on other device, the data
// will copy to cpu store in `temporary_cpu_buffer`
const void* CpuData() const;
// Set user memory buffer for Tensor, the memory is managed by
// the user it self, but the Tensor will share the memory with user
// So take care with the user buffer
void SetExternalData(const std::vector<int64_t>& new_shape,
const FDDataType& data_type, void* data_buffer);
const FDDataType& data_type, void* data_buffer,
const Device& new_device = Device::CPU);
// Initialize Tensor
// Include setting attribute for tensor
// and allocate cpu memory buffer
void Allocate(const std::vector<int64_t>& new_shape,
const FDDataType& data_type,
const std::string& tensor_name = "");
const std::string& tensor_name = "",
const Device& new_device = Device::CPU);
// Total size of tensor memory buffer in bytes
int Nbytes() const;
@@ -75,13 +81,51 @@ struct FASTDEPLOY_DECL FDTensor {
// Total number of elements in this tensor
int Numel() const;
void Resize(size_t nbytes);
void Resize(const std::vector<int64_t>& new_shape);
void Resize(const std::vector<int64_t>& new_shape,
const FDDataType& data_type, const std::string& tensor_name = "",
const Device& new_device = Device::CPU);
// Debug function
// Use this function to print shape, dtype, mean, max, min
// prefix will also be printed as tag
void PrintInfo(const std::string& prefix = "TensorInfo: ");
bool AllocFn(size_t nbytes) {
if (device == Device::GPU) {
#ifdef WITH_GPU
return FDDeviceAllocator()(&buffer_, nbytes);
#else
FDASSERT(false,
"The FastDeploy FDTensor allocator didn't compile under "
"-DWITH_GPU=ON,"
"so this is an unexpected problem happend.");
#endif
}
return FDHostAllocator()(&buffer_, nbytes);
}
void FreeFn() {
if (external_data_ptr != nullptr) external_data_ptr = nullptr;
if (buffer_ != nullptr) {
if (device == Device::GPU) {
#ifdef WITH_GPU
FDDeviceFree()(buffer_);
#endif
} else {
FDHostFree()(buffer_);
}
buffer_ = nullptr;
}
}
FDTensor() {}
explicit FDTensor(const std::string& tensor_name);
~FDTensor() { FreeFn(); }
};
} // namespace fastdeploy