FDTensor support GPU device (#190)

* fdtensor support GPU * TRT backend support GPU FDTensor * FDHostAllocator add FASTDEPLOY_DECL * fix FDTensor Data * fix FDTensor dtype Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-07 09:31:35 +08:00 · 2022-09-08 03:53:08 -05:00
parent bc8e9e4dae
commit 4d1f264d01
17 changed files with 432 additions and 153 deletions
--- a/csrc/fastdeploy/core/fd_tensor.cc
+++ b/csrc/fastdeploy/core/fd_tensor.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "fastdeploy/core/fd_tensor.h"
+
 #include "fastdeploy/utils/utils.h"

 #ifdef WITH_GPU
@@ -25,55 +26,69 @@ void* FDTensor::MutableData() {
  if (external_data_ptr != nullptr) {
    return external_data_ptr;
  }
-  return data.data();
+  return buffer_;
 }

 void* FDTensor::Data() {
  if (external_data_ptr != nullptr) {
-    if (device == Device::GPU) {
-#ifdef WITH_GPU
-      // need to copy cuda mem to cpu first
-      temporary_cpu_buffer.resize(Nbytes());
-      FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
-                          Nbytes(), cudaMemcpyDeviceToHost) == 0,
-               "[ERROR] Error occurs while copy memory from GPU to CPU");
-      return temporary_cpu_buffer.data();
-#else
-      FDASSERT(false,
-               "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
-               "an unexpected problem happend.");
-#endif
-    } else {
-      return external_data_ptr;
-    }
+    return external_data_ptr;
  }
-  return data.data();
+  return buffer_;
 }

 const void* FDTensor::Data() const {
  if (external_data_ptr != nullptr) {
    return external_data_ptr;
  }
-  return data.data();
+  return buffer_;
+}
+
+const void* FDTensor::CpuData() const {
+  if (device == Device::GPU) {
+#ifdef WITH_GPU
+    auto* cpu_ptr = const_cast<std::vector<int8_t>*>(&temporary_cpu_buffer);
+    cpu_ptr->resize(Nbytes());
+    // need to copy cuda mem to cpu first
+    if (external_data_ptr != nullptr) {
+      FDASSERT(cudaMemcpy(cpu_ptr->data(), external_data_ptr, Nbytes(),
+                          cudaMemcpyDeviceToHost) == 0,
+               "[ERROR] Error occurs while copy memory from GPU to CPU");
+
+    } else {
+      FDASSERT(cudaMemcpy(cpu_ptr->data(), buffer_, Nbytes(),
+                          cudaMemcpyDeviceToHost) == 0,
+               "[ERROR] Error occurs while buffer copy memory from GPU to CPU");
+    }
+    return cpu_ptr->data();
+#else
+    FDASSERT(false,
+             "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
+             "an unexpected problem happend.");
+#endif
+  }
+  return Data();
 }

 void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
-                               const FDDataType& data_type, void* data_buffer) {
+                               const FDDataType& data_type, void* data_buffer,
+                               const Device& new_device) {
  dtype = data_type;
  shape.assign(new_shape.begin(), new_shape.end());
  external_data_ptr = data_buffer;
+  device = new_device;
 }

 void FDTensor::Allocate(const std::vector<int64_t>& new_shape,
                        const FDDataType& data_type,
-                        const std::string& tensor_name) {
+                        const std::string& tensor_name,
+                        const Device& new_device) {
  dtype = data_type;
  name = tensor_name;
  shape.assign(new_shape.begin(), new_shape.end());
-  int unit = FDDataTypeSize(data_type);
-  int total_size =
-      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-  data.resize(total_size * unit);
+  device = new_device;
+  size_t nbytes = Nbytes();
+  FDASSERT(AllocFn(nbytes),
+           "The FastDeploy FDTensor allocate cpu memory error");
 }

 int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
@@ -82,6 +97,44 @@ int FDTensor::Numel() const {
  return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
 }

+void FDTensor::Resize(size_t new_nbytes) {
+  size_t nbytes = Nbytes();
+  if (new_nbytes > nbytes) {
+    FreeFn();
+    AllocFn(new_nbytes);
+  }
+}
+
+void FDTensor::Resize(const std::vector<int64_t>& new_shape) {
+  int numel = Numel();
+  int new_numel = std::accumulate(new_shape.begin(), new_shape.end(), 1,
+                                  std::multiplies<int>());
+  shape.assign(new_shape.begin(), new_shape.end());
+  if (new_numel > numel) {
+    FreeFn();
+    size_t nbytes = new_numel * FDDataTypeSize(dtype);
+    AllocFn(nbytes);
+  }
+}
+
+void FDTensor::Resize(const std::vector<int64_t>& new_shape,
+                      const FDDataType& data_type,
+                      const std::string& tensor_name,
+                      const Device& new_device) {
+  name = tensor_name;
+  device = new_device;
+  size_t nbytes = Nbytes();
+  shape.assign(new_shape.begin(), new_shape.end());
+  dtype = data_type;
+  int new_nbytes = std::accumulate(new_shape.begin(), new_shape.end(), 1,
+                                   std::multiplies<int>()) *
+                   FDDataTypeSize(data_type);
+  if (new_nbytes > nbytes) {
+    FreeFn();
+    AllocFn(new_nbytes);
+  }
+}
+
 template <typename T>
 void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
                         double* min) {