[Bug Fix] fix trt backend page-locked error (#2095)

* [Bug Fix] fix trt backend page-locked error

* Update trt_backend.cc
This commit is contained in:
DefTruth
2023-07-11 13:49:47 +08:00
committed by GitHub
parent 4c1e80b723
commit cf1ff2077d

View File

@@ -470,15 +470,31 @@ void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
if (item.dtype == FDDataType::INT64) {
int64_t* data = static_cast<int64_t*>(const_cast<void*>(item.Data()));
std::vector<int32_t> casted_data(data, data + item.Numel());
FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
// FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
// static_cast<void*>(casted_data.data()),
// item.Nbytes() / 2, cudaMemcpyHostToDevice,
// stream_) == 0,
// "Error occurs while copy memory from CPU to GPU.");
// WARN: For cudaMemcpyHostToDevice direction, cudaMemcpyAsync need page-locked host
// memory to avoid any overlap to occur. The page-locked feature need by cudaMemcpyAsync
// may not guarantee by FDTensor now. Reference:
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#creation-and-destruction
FDASSERT(cudaMemcpy(inputs_device_buffer_[item.name].data(),
static_cast<void*>(casted_data.data()),
item.Nbytes() / 2, cudaMemcpyHostToDevice,
stream_) == 0,
item.Nbytes() / 2, cudaMemcpyHostToDevice) == 0,
"Error occurs while copy memory from CPU to GPU.");
} else {
FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
// FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
// item.Data(), item.Nbytes(),
// cudaMemcpyHostToDevice, stream_) == 0,
// "Error occurs while copy memory from CPU to GPU.");
// WARN: For cudaMemcpyHostToDevice direction, cudaMemcpyAsync need page-locked host
// memory to avoid any overlap to occur. The page-locked feature need by cudaMemcpyAsync
// may not guarantee by FDTensor now. Reference:
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#creation-and-destruction
FDASSERT(cudaMemcpy(inputs_device_buffer_[item.name].data(),
item.Data(), item.Nbytes(),
cudaMemcpyHostToDevice, stream_) == 0,
cudaMemcpyHostToDevice) == 0,
"Error occurs while copy memory from CPU to GPU.");
}
}