Fix directly visit member of FDTensor (#193)

* optimize tensorrt usage

* format code

* fix input shape error for onnx model

* Remove some code directly visit FDTensor member (#192)

remove some code directly visit FDTensor member

* fix directly visit member of FDTensor

Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
Jason
2022-09-06 11:12:02 +08:00
committed by GitHub
parent 969531dcc8
commit e09ac18a7d
8 changed files with 36 additions and 48 deletions

View File

@@ -164,32 +164,27 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
return true;
}
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name) {
const auto info = value.GetTensorTypeAndShapeInfo();
const auto data_type = info.GetElementType();
size_t numel = info.GetElementCount();
tensor->shape = info.GetShape();
if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
tensor->data.resize(numel * sizeof(float));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
tensor->Allocate(info.GetShape(), FDDataType::FP32, name);
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
numel * sizeof(float));
tensor->dtype = FDDataType::FP32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
tensor->data.resize(numel * sizeof(int32_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
tensor->Allocate(info.GetShape(), FDDataType::INT32, name);
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
numel * sizeof(int32_t));
tensor->dtype = FDDataType::INT32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
tensor->data.resize(numel * sizeof(int64_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
tensor->Allocate(info.GetShape(), FDDataType::INT64, name);
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
numel * sizeof(int64_t));
tensor->dtype = FDDataType::INT64;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
tensor->data.resize(numel * sizeof(double));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
tensor->Allocate(info.GetShape(), FDDataType::FP64, name);
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
numel * sizeof(double));
tensor->dtype = FDDataType::FP64;
} else {
FDASSERT(
false,
@@ -231,8 +226,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
outputs->resize(ort_outputs.size());
for (size_t i = 0; i < ort_outputs.size(); ++i) {
(*outputs)[i].name = outputs_desc_[i].name;
CopyToCpu(ort_outputs[i], &((*outputs)[i]));
CopyToCpu(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name);
}
return true;

View File

@@ -88,6 +88,6 @@ class OrtBackend : public BaseBackend {
Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
#endif
OrtBackendOption option_;
void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
void CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name);
};
} // namespace fastdeploy

View File

@@ -365,12 +365,8 @@ void TrtBackend::AllocateBufferInDynamicShape(
"Cannot find output: %s of tensorrt network from the original model.",
outputs_desc_[i].name.c_str());
auto ori_idx = iter->second;
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
(*outputs)[ori_idx].shape.assign(output_dims.d,
output_dims.d + output_dims.nbDims);
(*outputs)[ori_idx].name = outputs_desc_[i].name;
(*outputs)[ori_idx].data.resize(Volume(output_dims) *
TrtDataTypeSize(outputs_desc_[i].dtype));
std::vector<int64_t> shape(output_dims.d, output_dims.d + output_dims.nbDims);
(*outputs)[ori_idx].Allocate(shape, GetFDDataType(outputs_desc_[i].dtype), outputs_desc_[i].name);
if ((*outputs)[ori_idx].Nbytes() >
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);

View File

@@ -79,7 +79,7 @@ void BindRuntime(pybind11::module& m) {
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
// use SetExternalData
inputs[index].data.resize(iter->second.nbytes());
memcpy(inputs[index].data.data(), iter->second.mutable_data(),
memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
iter->second.nbytes());
inputs[index].name = iter->first;
index += 1;
@@ -94,7 +94,7 @@ void BindRuntime(pybind11::module& m) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(
pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
memcpy(results[i].mutable_data(), outputs[i].Data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;

View File

@@ -66,7 +66,7 @@ void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
tensor->external_data_ptr = pyarray.mutable_data();
} else {
tensor->data.resize(pyarray.nbytes());
memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
memcpy(tensor->MutableData(), pyarray.mutable_data(), pyarray.nbytes());
}
}

View File

@@ -42,8 +42,7 @@ pybind11::array TensorToPyArray(const FDTensor& tensor);
cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
#endif
template <typename T>
FDDataType CTypeToFDDataType() {
template <typename T> FDDataType CTypeToFDDataType() {
if (std::is_same<T, int32_t>::value) {
return FDDataType::INT32;
} else if (std::is_same<T, int64_t>::value) {
@@ -59,8 +58,8 @@ FDDataType CTypeToFDDataType() {
}
template <typename T>
std::vector<pybind11::array> PyBackendInfer(
T& self, const std::vector<std::string>& names,
std::vector<pybind11::array>
PyBackendInfer(T& self, const std::vector<std::string>& names,
std::vector<pybind11::array>& data) {
std::vector<FDTensor> inputs(data.size());
for (size_t i = 0; i < data.size(); ++i) {
@@ -69,7 +68,7 @@ std::vector<pybind11::array> PyBackendInfer(
inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
data[i].shape() + data[i].ndim());
inputs[i].data.resize(data[i].nbytes());
memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
memcpy(inputs[i].MutableData(), data[i].mutable_data(), data[i].nbytes());
inputs[i].name = names[i];
}
@@ -81,7 +80,7 @@ std::vector<pybind11::array> PyBackendInfer(
for (size_t i = 0; i < outputs.size(); ++i) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
memcpy(results[i].mutable_data(), outputs[i].Data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;

View File

@@ -115,7 +115,7 @@ bool PaddleClasModel::Postprocess(const FDTensor& infer_result,
ClassifyResult* result, int topk) {
int num_classes = infer_result.shape[1];
const float* infer_result_buffer =
reinterpret_cast<const float*>(infer_result.data.data());
reinterpret_cast<const float*>(infer_result.Data());
topk = std::min(num_classes, topk);
result->label_ids =
utils::TopKIndices(infer_result_buffer, num_classes, topk);

View File

@@ -14,11 +14,11 @@
#pragma once
#include <set>
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
#include "fastdeploy/vision/common/result.h"
#include <set>
#include <vector>
namespace fastdeploy {
namespace vision {
@@ -87,8 +87,7 @@ void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result,
}
}
template <typename T>
void NCHW2NHWC(FDTensor& infer_result) {
template <typename T> void NCHW2NHWC(FDTensor& infer_result) {
T* infer_result_buffer = reinterpret_cast<T*>(infer_result.MutableData());
int num = infer_result.shape[0];
int channel = infer_result.shape[1];
@@ -125,8 +124,8 @@ void SortDetectionResult(DetectionResult* output);
void SortDetectionResult(FaceDetectionResult* result);
// L2 Norm / cosine similarity (for face recognition, ...)
FASTDEPLOY_DECL std::vector<float> L2Normalize(
const std::vector<float>& values);
FASTDEPLOY_DECL std::vector<float>
L2Normalize(const std::vector<float>& values);
FASTDEPLOY_DECL float CosineSimilarity(const std::vector<float>& a,
const std::vector<float>& b,