mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
FDTensor support GPU device (#190)
* fdtensor support GPU * TRT backend support GPU FDTensor * FDHostAllocator add FASTDEPLOY_DECL * fix FDTensor Data * fix FDTensor dtype Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
#ifdef WITH_GPU
|
||||
@@ -25,55 +26,69 @@ void* FDTensor::MutableData() {
|
||||
if (external_data_ptr != nullptr) {
|
||||
return external_data_ptr;
|
||||
}
|
||||
return data.data();
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
void* FDTensor::Data() {
|
||||
if (external_data_ptr != nullptr) {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
// need to copy cuda mem to cpu first
|
||||
temporary_cpu_buffer.resize(Nbytes());
|
||||
FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
|
||||
Nbytes(), cudaMemcpyDeviceToHost) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU");
|
||||
return temporary_cpu_buffer.data();
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
|
||||
"an unexpected problem happend.");
|
||||
#endif
|
||||
} else {
|
||||
return external_data_ptr;
|
||||
}
|
||||
return external_data_ptr;
|
||||
}
|
||||
return data.data();
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
const void* FDTensor::Data() const {
|
||||
if (external_data_ptr != nullptr) {
|
||||
return external_data_ptr;
|
||||
}
|
||||
return data.data();
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
const void* FDTensor::CpuData() const {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
auto* cpu_ptr = const_cast<std::vector<int8_t>*>(&temporary_cpu_buffer);
|
||||
cpu_ptr->resize(Nbytes());
|
||||
// need to copy cuda mem to cpu first
|
||||
if (external_data_ptr != nullptr) {
|
||||
FDASSERT(cudaMemcpy(cpu_ptr->data(), external_data_ptr, Nbytes(),
|
||||
cudaMemcpyDeviceToHost) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU");
|
||||
|
||||
} else {
|
||||
FDASSERT(cudaMemcpy(cpu_ptr->data(), buffer_, Nbytes(),
|
||||
cudaMemcpyDeviceToHost) == 0,
|
||||
"[ERROR] Error occurs while buffer copy memory from GPU to CPU");
|
||||
}
|
||||
return cpu_ptr->data();
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
|
||||
"an unexpected problem happend.");
|
||||
#endif
|
||||
}
|
||||
return Data();
|
||||
}
|
||||
|
||||
void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type, void* data_buffer) {
|
||||
const FDDataType& data_type, void* data_buffer,
|
||||
const Device& new_device) {
|
||||
dtype = data_type;
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
external_data_ptr = data_buffer;
|
||||
device = new_device;
|
||||
}
|
||||
|
||||
void FDTensor::Allocate(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type,
|
||||
const std::string& tensor_name) {
|
||||
const std::string& tensor_name,
|
||||
const Device& new_device) {
|
||||
dtype = data_type;
|
||||
name = tensor_name;
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
int unit = FDDataTypeSize(data_type);
|
||||
int total_size =
|
||||
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
|
||||
data.resize(total_size * unit);
|
||||
device = new_device;
|
||||
size_t nbytes = Nbytes();
|
||||
FDASSERT(AllocFn(nbytes),
|
||||
"The FastDeploy FDTensor allocate cpu memory error");
|
||||
}
|
||||
|
||||
int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
|
||||
@@ -82,6 +97,44 @@ int FDTensor::Numel() const {
|
||||
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
|
||||
}
|
||||
|
||||
void FDTensor::Resize(size_t new_nbytes) {
|
||||
size_t nbytes = Nbytes();
|
||||
if (new_nbytes > nbytes) {
|
||||
FreeFn();
|
||||
AllocFn(new_nbytes);
|
||||
}
|
||||
}
|
||||
|
||||
void FDTensor::Resize(const std::vector<int64_t>& new_shape) {
|
||||
int numel = Numel();
|
||||
int new_numel = std::accumulate(new_shape.begin(), new_shape.end(), 1,
|
||||
std::multiplies<int>());
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
if (new_numel > numel) {
|
||||
FreeFn();
|
||||
size_t nbytes = new_numel * FDDataTypeSize(dtype);
|
||||
AllocFn(nbytes);
|
||||
}
|
||||
}
|
||||
|
||||
void FDTensor::Resize(const std::vector<int64_t>& new_shape,
|
||||
const FDDataType& data_type,
|
||||
const std::string& tensor_name,
|
||||
const Device& new_device) {
|
||||
name = tensor_name;
|
||||
device = new_device;
|
||||
size_t nbytes = Nbytes();
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
dtype = data_type;
|
||||
int new_nbytes = std::accumulate(new_shape.begin(), new_shape.end(), 1,
|
||||
std::multiplies<int>()) *
|
||||
FDDataTypeSize(data_type);
|
||||
if (new_nbytes > nbytes) {
|
||||
FreeFn();
|
||||
AllocFn(new_nbytes);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
|
||||
double* min) {
|
||||
|
Reference in New Issue
Block a user