mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
FDTensor support GPU device (#190)
* fdtensor support GPU * TRT backend support GPU FDTensor * FDHostAllocator add FASTDEPLOY_DECL * fix FDTensor Data * fix FDTensor dtype Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -18,15 +18,17 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/core/allocate.h"
|
||||
#include "fastdeploy/core/fd_type.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
struct FASTDEPLOY_DECL FDTensor {
|
||||
std::vector<int8_t> data;
|
||||
std::vector<int64_t> shape;
|
||||
// std::vector<int8_t> data;
|
||||
void* buffer_ = nullptr;
|
||||
std::vector<int64_t> shape = {0};
|
||||
std::string name = "";
|
||||
FDDataType dtype;
|
||||
FDDataType dtype = FDDataType::INT8;
|
||||
|
||||
// This use to skip memory copy step
|
||||
// the external_data_ptr will point to the user allocated memory
|
||||
@@ -46,28 +48,32 @@ struct FASTDEPLOY_DECL FDTensor {
|
||||
// Get data buffer pointer
|
||||
void* MutableData();
|
||||
|
||||
// Use this data to get the tensor data to process
|
||||
// Since the most senario is process data in CPU
|
||||
// this function weill return a pointer to cpu memory
|
||||
// buffer.
|
||||
// If the original data is on other device, the data
|
||||
// will copy to cpu store in `temporary_cpu_buffer`
|
||||
void* Data();
|
||||
|
||||
const void* Data() const;
|
||||
|
||||
// Use this data to get the tensor data to process
|
||||
// Since the most senario is process data in CPU
|
||||
// this function will return a pointer to cpu memory
|
||||
// buffer.
|
||||
// If the original data is on other device, the data
|
||||
// will copy to cpu store in `temporary_cpu_buffer`
|
||||
const void* CpuData() const;
|
||||
|
||||
// Set user memory buffer for Tensor, the memory is managed by
|
||||
// the user it self, but the Tensor will share the memory with user
|
||||
// So take care with the user buffer
|
||||
void SetExternalData(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type, void* data_buffer);
|
||||
const FDDataType& data_type, void* data_buffer,
|
||||
const Device& new_device = Device::CPU);
|
||||
|
||||
// Initialize Tensor
|
||||
// Include setting attribute for tensor
|
||||
// and allocate cpu memory buffer
|
||||
void Allocate(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type,
|
||||
const std::string& tensor_name = "");
|
||||
const std::string& tensor_name = "",
|
||||
const Device& new_device = Device::CPU);
|
||||
|
||||
// Total size of tensor memory buffer in bytes
|
||||
int Nbytes() const;
|
||||
@@ -75,13 +81,51 @@ struct FASTDEPLOY_DECL FDTensor {
|
||||
// Total number of elements in this tensor
|
||||
int Numel() const;
|
||||
|
||||
void Resize(size_t nbytes);
|
||||
|
||||
void Resize(const std::vector<int64_t>& new_shape);
|
||||
|
||||
void Resize(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type, const std::string& tensor_name = "",
|
||||
const Device& new_device = Device::CPU);
|
||||
|
||||
// Debug function
|
||||
// Use this function to print shape, dtype, mean, max, min
|
||||
// prefix will also be printed as tag
|
||||
void PrintInfo(const std::string& prefix = "TensorInfo: ");
|
||||
|
||||
bool AllocFn(size_t nbytes) {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
return FDDeviceAllocator()(&buffer_, nbytes);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"The FastDeploy FDTensor allocator didn't compile under "
|
||||
"-DWITH_GPU=ON,"
|
||||
"so this is an unexpected problem happend.");
|
||||
#endif
|
||||
}
|
||||
return FDHostAllocator()(&buffer_, nbytes);
|
||||
}
|
||||
|
||||
void FreeFn() {
|
||||
if (external_data_ptr != nullptr) external_data_ptr = nullptr;
|
||||
if (buffer_ != nullptr) {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
FDDeviceFree()(buffer_);
|
||||
#endif
|
||||
} else {
|
||||
FDHostFree()(buffer_);
|
||||
}
|
||||
buffer_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
FDTensor() {}
|
||||
explicit FDTensor(const std::string& tensor_name);
|
||||
|
||||
~FDTensor() { FreeFn(); }
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user