FDTensor support GPU device (#190)

* fdtensor support GPU * TRT backend support GPU FDTensor * FDHostAllocator add FASTDEPLOY_DECL * fix FDTensor Data * fix FDTensor dtype Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-06 17:17:14 +08:00 · 2022-09-08 03:53:08 -05:00
parent bc8e9e4dae
commit 4d1f264d01
17 changed files with 432 additions and 153 deletions
--- a/csrc/fastdeploy/core/fd_tensor.h
+++ b/csrc/fastdeploy/core/fd_tensor.h
@@ -18,15 +18,17 @@
 #include <string>
 #include <vector>

+#include "fastdeploy/core/allocate.h"
 #include "fastdeploy/core/fd_type.h"

 namespace fastdeploy {

 struct FASTDEPLOY_DECL FDTensor {
-  std::vector<int8_t> data;
-  std::vector<int64_t> shape;
+  // std::vector<int8_t> data;
+  void* buffer_ = nullptr;
+  std::vector<int64_t> shape = {0};
  std::string name = "";
-  FDDataType dtype;
+  FDDataType dtype = FDDataType::INT8;

  // This use to skip memory copy step
  // the external_data_ptr will point to the user allocated memory
@@ -46,28 +48,32 @@ struct FASTDEPLOY_DECL FDTensor {
  // Get data buffer pointer
  void* MutableData();

-  // Use this data to get the tensor data to process
-  // Since the most senario is process data in CPU
-  // this function weill return a pointer to cpu memory
-  // buffer.
-  // If the original data is on other device, the data
-  // will copy to cpu store in `temporary_cpu_buffer`
  void* Data();

  const void* Data() const;

+  // Use this data to get the tensor data to process
+  // Since the most senario is process data in CPU
+  // this function will return a pointer to cpu memory
+  // buffer.
+  // If the original data is on other device, the data
+  // will copy to cpu store in `temporary_cpu_buffer`
+  const void* CpuData() const;
+
  // Set user memory buffer for Tensor, the memory is managed by
  // the user it self, but the Tensor will share the memory with user
  // So take care with the user buffer
  void SetExternalData(const std::vector<int64_t>& new_shape,
-                       const FDDataType& data_type, void* data_buffer);
+                       const FDDataType& data_type, void* data_buffer,
+                       const Device& new_device = Device::CPU);

  // Initialize Tensor
  // Include setting attribute for tensor
  // and allocate cpu memory buffer
  void Allocate(const std::vector<int64_t>& new_shape,
                const FDDataType& data_type,
-                const std::string& tensor_name = "");
+                const std::string& tensor_name = "",
+                const Device& new_device = Device::CPU);

  // Total size of tensor memory buffer in bytes
  int Nbytes() const;
@@ -75,13 +81,51 @@ struct FASTDEPLOY_DECL FDTensor {
  // Total number of elements in this tensor
  int Numel() const;

+  void Resize(size_t nbytes);
+
+  void Resize(const std::vector<int64_t>& new_shape);
+
+  void Resize(const std::vector<int64_t>& new_shape,
+              const FDDataType& data_type, const std::string& tensor_name = "",
+              const Device& new_device = Device::CPU);
+
  // Debug function
  // Use this function to print shape, dtype, mean, max, min
  // prefix will also be printed as tag
  void PrintInfo(const std::string& prefix = "TensorInfo: ");

+  bool AllocFn(size_t nbytes) {
+    if (device == Device::GPU) {
+#ifdef WITH_GPU
+      return FDDeviceAllocator()(&buffer_, nbytes);
+#else
+      FDASSERT(false,
+               "The FastDeploy FDTensor allocator didn't compile under "
+               "-DWITH_GPU=ON,"
+               "so this is an unexpected problem happend.");
+#endif
+    }
+    return FDHostAllocator()(&buffer_, nbytes);
+  }
+
+  void FreeFn() {
+    if (external_data_ptr != nullptr) external_data_ptr = nullptr;
+    if (buffer_ != nullptr) {
+      if (device == Device::GPU) {
+#ifdef WITH_GPU
+        FDDeviceFree()(buffer_);
+#endif
+      } else {
+        FDHostFree()(buffer_);
+      }
+      buffer_ = nullptr;
+    }
+  }
+
  FDTensor() {}
  explicit FDTensor(const std::string& tensor_name);
+
+  ~FDTensor() { FreeFn(); }
 };

 }  // namespace fastdeploy