Fix directly visit member of FDTensor (#193)

* optimize tensorrt usage * format code * fix input shape error for onnx model * Remove some code directly visit FDTensor member (#192) remove some code directly visit FDTensor member * fix directly visit member of FDTensor Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
2025-10-06 17:17:14 +08:00 · 2022-09-06 11:12:02 +08:00
parent 969531dcc8
commit e09ac18a7d
8 changed files with 36 additions and 48 deletions
--- a/csrc/fastdeploy/backends/ort/ort_backend.cc
+++ b/csrc/fastdeploy/backends/ort/ort_backend.cc
@@ -164,32 +164,27 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
  return true;
 }

-void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
+void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name) {
  const auto info = value.GetTensorTypeAndShapeInfo();
  const auto data_type = info.GetElementType();
  size_t numel = info.GetElementCount();
-  tensor->shape = info.GetShape();

  if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
-    tensor->data.resize(numel * sizeof(float));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
+    tensor->Allocate(info.GetShape(), FDDataType::FP32, name);
+    memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
           numel * sizeof(float));
-    tensor->dtype = FDDataType::FP32;
  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
-    tensor->data.resize(numel * sizeof(int32_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
+    tensor->Allocate(info.GetShape(), FDDataType::INT32, name);
+    memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
           numel * sizeof(int32_t));
-    tensor->dtype = FDDataType::INT32;
  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
-    tensor->data.resize(numel * sizeof(int64_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
+    tensor->Allocate(info.GetShape(), FDDataType::INT64, name);
+    memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
           numel * sizeof(int64_t));
-    tensor->dtype = FDDataType::INT64;
  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
-    tensor->data.resize(numel * sizeof(double));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
+    tensor->Allocate(info.GetShape(), FDDataType::FP64, name);
+    memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
           numel * sizeof(double));
-    tensor->dtype = FDDataType::FP64;
  } else {
    FDASSERT(
        false,
@@ -231,8 +226,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
  std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
  outputs->resize(ort_outputs.size());
  for (size_t i = 0; i < ort_outputs.size(); ++i) {
-    (*outputs)[i].name = outputs_desc_[i].name;
-    CopyToCpu(ort_outputs[i], &((*outputs)[i]));
+    CopyToCpu(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name);
  }

  return true;
--- a/csrc/fastdeploy/backends/ort/ort_backend.h
+++ b/csrc/fastdeploy/backends/ort/ort_backend.h
@@ -88,6 +88,6 @@ class OrtBackend : public BaseBackend {
  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
 #endif
  OrtBackendOption option_;
-  void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
+  void CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name);
 };
 }  // namespace fastdeploy
--- a/csrc/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/csrc/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -365,12 +365,8 @@ void TrtBackend::AllocateBufferInDynamicShape(
        "Cannot find output: %s of tensorrt network from the original model.",
        outputs_desc_[i].name.c_str());
    auto ori_idx = iter->second;
-    (*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
-    (*outputs)[ori_idx].shape.assign(output_dims.d,
-                                     output_dims.d + output_dims.nbDims);
-    (*outputs)[ori_idx].name = outputs_desc_[i].name;
-    (*outputs)[ori_idx].data.resize(Volume(output_dims) *
-                                    TrtDataTypeSize(outputs_desc_[i].dtype));
+    std::vector<int64_t> shape(output_dims.d, output_dims.d + output_dims.nbDims);
+    (*outputs)[ori_idx].Allocate(shape, GetFDDataType(outputs_desc_[i].dtype), outputs_desc_[i].name);
    if ((*outputs)[ori_idx].Nbytes() >
        outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
      outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
--- a/csrc/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/csrc/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -79,7 +79,7 @@ void BindRuntime(pybind11::module& m) {
               // TODO(jiangjiajun) Maybe skip memory copy is a better choice
               // use SetExternalData
               inputs[index].data.resize(iter->second.nbytes());
-               memcpy(inputs[index].data.data(), iter->second.mutable_data(),
+               memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
                      iter->second.nbytes());
               inputs[index].name = iter->first;
               index += 1;
@@ -94,7 +94,7 @@ void BindRuntime(pybind11::module& m) {
               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
               results.emplace_back(
                   pybind11::array(numpy_dtype, outputs[i].shape));
-               memcpy(results[i].mutable_data(), outputs[i].data.data(),
+               memcpy(results[i].mutable_data(), outputs[i].Data(),
                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
             }
             return results;
--- a/csrc/fastdeploy/pybind/main.cc.in
+++ b/csrc/fastdeploy/pybind/main.cc.in
@@ -66,7 +66,7 @@ void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
    tensor->external_data_ptr = pyarray.mutable_data();
  } else {
    tensor->data.resize(pyarray.nbytes());
-    memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
+    memcpy(tensor->MutableData(), pyarray.mutable_data(), pyarray.nbytes());
  }
 }

--- a/csrc/fastdeploy/pybind/main.h
+++ b/csrc/fastdeploy/pybind/main.h
@@ -42,8 +42,7 @@ pybind11::array TensorToPyArray(const FDTensor& tensor);
 cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
 #endif

-template <typename T>
-FDDataType CTypeToFDDataType() {
+template <typename T> FDDataType CTypeToFDDataType() {
  if (std::is_same<T, int32_t>::value) {
    return FDDataType::INT32;
  } else if (std::is_same<T, int64_t>::value) {
@@ -59,8 +58,8 @@ FDDataType CTypeToFDDataType() {
 }

 template <typename T>
-std::vector<pybind11::array> PyBackendInfer(
-    T& self, const std::vector<std::string>& names,
+std::vector<pybind11::array>
+PyBackendInfer(T& self, const std::vector<std::string>& names,
               std::vector<pybind11::array>& data) {
  std::vector<FDTensor> inputs(data.size());
  for (size_t i = 0; i < data.size(); ++i) {
@@ -69,7 +68,7 @@ std::vector<pybind11::array> PyBackendInfer(
    inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
                           data[i].shape() + data[i].ndim());
    inputs[i].data.resize(data[i].nbytes());
-    memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
+    memcpy(inputs[i].MutableData(), data[i].mutable_data(), data[i].nbytes());
    inputs[i].name = names[i];
  }

@@ -81,7 +80,7 @@ std::vector<pybind11::array> PyBackendInfer(
  for (size_t i = 0; i < outputs.size(); ++i) {
    auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
    results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
-    memcpy(results[i].mutable_data(), outputs[i].data.data(),
+    memcpy(results[i].mutable_data(), outputs[i].Data(),
           outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
  }
  return results;
--- a/csrc/fastdeploy/vision/classification/ppcls/model.cc
+++ b/csrc/fastdeploy/vision/classification/ppcls/model.cc
@@ -115,7 +115,7 @@ bool PaddleClasModel::Postprocess(const FDTensor& infer_result,
                                  ClassifyResult* result, int topk) {
  int num_classes = infer_result.shape[1];
  const float* infer_result_buffer =
-      reinterpret_cast<const float*>(infer_result.data.data());
+      reinterpret_cast<const float*>(infer_result.Data());
  topk = std::min(num_classes, topk);
  result->label_ids =
      utils::TopKIndices(infer_result_buffer, num_classes, topk);
--- a/csrc/fastdeploy/vision/utils/utils.h
+++ b/csrc/fastdeploy/vision/utils/utils.h
@@ -14,11 +14,11 @@

 #pragma once

-#include <set>
-#include <vector>
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/utils/utils.h"
 #include "fastdeploy/vision/common/result.h"
+#include <set>
+#include <vector>

 namespace fastdeploy {
 namespace vision {
@@ -87,8 +87,7 @@ void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result,
  }
 }

-template <typename T>
-void NCHW2NHWC(FDTensor& infer_result) {
+template <typename T> void NCHW2NHWC(FDTensor& infer_result) {
  T* infer_result_buffer = reinterpret_cast<T*>(infer_result.MutableData());
  int num = infer_result.shape[0];
  int channel = infer_result.shape[1];
@@ -125,8 +124,8 @@ void SortDetectionResult(DetectionResult* output);
 void SortDetectionResult(FaceDetectionResult* result);

 // L2 Norm / cosine similarity  (for face recognition, ...)
-FASTDEPLOY_DECL std::vector<float> L2Normalize(
-    const std::vector<float>& values);
+FASTDEPLOY_DECL std::vector<float>
+L2Normalize(const std::vector<float>& values);

 FASTDEPLOY_DECL float CosineSimilarity(const std::vector<float>& a,
                                       const std::vector<float>& b,