From cf1ff2077da62858cb011db4fa3d57650a4d6623 Mon Sep 17 00:00:00 2001 From: DefTruth <31974251+DefTruth@users.noreply.github.com> Date: Tue, 11 Jul 2023 13:49:47 +0800 Subject: [PATCH] [Bug Fix] fix trt backend page-locked error (#2095) * [Bug Fix] fix trt backend page-locked error * Update trt_backend.cc --- .../runtime/backends/tensorrt/trt_backend.cc | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc index 7a14221ab..f1ac6a729 100644 --- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc +++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc @@ -470,16 +470,32 @@ void TrtBackend::SetInputs(const std::vector& inputs) { if (item.dtype == FDDataType::INT64) { int64_t* data = static_cast(const_cast(item.Data())); std::vector casted_data(data, data + item.Numel()); - FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), - static_cast(casted_data.data()), - item.Nbytes() / 2, cudaMemcpyHostToDevice, - stream_) == 0, - "Error occurs while copy memory from CPU to GPU."); + // FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), + // static_cast(casted_data.data()), + // item.Nbytes() / 2, cudaMemcpyHostToDevice, + // stream_) == 0, + // "Error occurs while copy memory from CPU to GPU."); + // WARN: For cudaMemcpyHostToDevice direction, cudaMemcpyAsync need page-locked host + // memory to avoid any overlap to occur. The page-locked feature need by cudaMemcpyAsync + // may not guarantee by FDTensor now. Reference: + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#creation-and-destruction + FDASSERT(cudaMemcpy(inputs_device_buffer_[item.name].data(), + static_cast(casted_data.data()), + item.Nbytes() / 2, cudaMemcpyHostToDevice) == 0, + "Error occurs while copy memory from CPU to GPU."); } else { - FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), - item.Data(), item.Nbytes(), - cudaMemcpyHostToDevice, stream_) == 0, - "Error occurs while copy memory from CPU to GPU."); + // FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), + // item.Data(), item.Nbytes(), + // cudaMemcpyHostToDevice, stream_) == 0, + // "Error occurs while copy memory from CPU to GPU."); + // WARN: For cudaMemcpyHostToDevice direction, cudaMemcpyAsync need page-locked host + // memory to avoid any overlap to occur. The page-locked feature need by cudaMemcpyAsync + // may not guarantee by FDTensor now. Reference: + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#creation-and-destruction + FDASSERT(cudaMemcpy(inputs_device_buffer_[item.name].data(), + item.Data(), item.Nbytes(), + cudaMemcpyHostToDevice) == 0, + "Error occurs while copy memory from CPU to GPU."); } } // binding input buffer