mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
Fix directly visit member of FDTensor (#193)
* optimize tensorrt usage * format code * fix input shape error for onnx model * Remove some code directly visit FDTensor member (#192) remove some code directly visit FDTensor member * fix directly visit member of FDTensor Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
@@ -164,32 +164,27 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
return true;
|
||||
}
|
||||
|
||||
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
|
||||
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name) {
|
||||
const auto info = value.GetTensorTypeAndShapeInfo();
|
||||
const auto data_type = info.GetElementType();
|
||||
size_t numel = info.GetElementCount();
|
||||
tensor->shape = info.GetShape();
|
||||
|
||||
if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
|
||||
tensor->data.resize(numel * sizeof(float));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
tensor->Allocate(info.GetShape(), FDDataType::FP32, name);
|
||||
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(float));
|
||||
tensor->dtype = FDDataType::FP32;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
|
||||
tensor->data.resize(numel * sizeof(int32_t));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
tensor->Allocate(info.GetShape(), FDDataType::INT32, name);
|
||||
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(int32_t));
|
||||
tensor->dtype = FDDataType::INT32;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
|
||||
tensor->data.resize(numel * sizeof(int64_t));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
tensor->Allocate(info.GetShape(), FDDataType::INT64, name);
|
||||
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(int64_t));
|
||||
tensor->dtype = FDDataType::INT64;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
|
||||
tensor->data.resize(numel * sizeof(double));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
tensor->Allocate(info.GetShape(), FDDataType::FP64, name);
|
||||
memcpy(static_cast<void*>(tensor->MutableData()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(double));
|
||||
tensor->dtype = FDDataType::FP64;
|
||||
} else {
|
||||
FDASSERT(
|
||||
false,
|
||||
@@ -231,8 +226,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
|
||||
outputs->resize(ort_outputs.size());
|
||||
for (size_t i = 0; i < ort_outputs.size(); ++i) {
|
||||
(*outputs)[i].name = outputs_desc_[i].name;
|
||||
CopyToCpu(ort_outputs[i], &((*outputs)[i]));
|
||||
CopyToCpu(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -88,6 +88,6 @@ class OrtBackend : public BaseBackend {
|
||||
Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
|
||||
#endif
|
||||
OrtBackendOption option_;
|
||||
void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
|
||||
void CopyToCpu(const Ort::Value& value, FDTensor* tensor, const std::string& name);
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
|
@@ -365,12 +365,8 @@ void TrtBackend::AllocateBufferInDynamicShape(
|
||||
"Cannot find output: %s of tensorrt network from the original model.",
|
||||
outputs_desc_[i].name.c_str());
|
||||
auto ori_idx = iter->second;
|
||||
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
||||
(*outputs)[ori_idx].shape.assign(output_dims.d,
|
||||
output_dims.d + output_dims.nbDims);
|
||||
(*outputs)[ori_idx].name = outputs_desc_[i].name;
|
||||
(*outputs)[ori_idx].data.resize(Volume(output_dims) *
|
||||
TrtDataTypeSize(outputs_desc_[i].dtype));
|
||||
std::vector<int64_t> shape(output_dims.d, output_dims.d + output_dims.nbDims);
|
||||
(*outputs)[ori_idx].Allocate(shape, GetFDDataType(outputs_desc_[i].dtype), outputs_desc_[i].name);
|
||||
if ((*outputs)[ori_idx].Nbytes() >
|
||||
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
|
||||
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
|
||||
|
@@ -79,7 +79,7 @@ void BindRuntime(pybind11::module& m) {
|
||||
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
|
||||
// use SetExternalData
|
||||
inputs[index].data.resize(iter->second.nbytes());
|
||||
memcpy(inputs[index].data.data(), iter->second.mutable_data(),
|
||||
memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
|
||||
iter->second.nbytes());
|
||||
inputs[index].name = iter->first;
|
||||
index += 1;
|
||||
@@ -94,7 +94,7 @@ void BindRuntime(pybind11::module& m) {
|
||||
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
|
||||
results.emplace_back(
|
||||
pybind11::array(numpy_dtype, outputs[i].shape));
|
||||
memcpy(results[i].mutable_data(), outputs[i].data.data(),
|
||||
memcpy(results[i].mutable_data(), outputs[i].Data(),
|
||||
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
|
||||
}
|
||||
return results;
|
||||
|
@@ -66,7 +66,7 @@ void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
|
||||
tensor->external_data_ptr = pyarray.mutable_data();
|
||||
} else {
|
||||
tensor->data.resize(pyarray.nbytes());
|
||||
memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
|
||||
memcpy(tensor->MutableData(), pyarray.mutable_data(), pyarray.nbytes());
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -42,8 +42,7 @@ pybind11::array TensorToPyArray(const FDTensor& tensor);
|
||||
cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
FDDataType CTypeToFDDataType() {
|
||||
template <typename T> FDDataType CTypeToFDDataType() {
|
||||
if (std::is_same<T, int32_t>::value) {
|
||||
return FDDataType::INT32;
|
||||
} else if (std::is_same<T, int64_t>::value) {
|
||||
@@ -59,8 +58,8 @@ FDDataType CTypeToFDDataType() {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<pybind11::array> PyBackendInfer(
|
||||
T& self, const std::vector<std::string>& names,
|
||||
std::vector<pybind11::array>
|
||||
PyBackendInfer(T& self, const std::vector<std::string>& names,
|
||||
std::vector<pybind11::array>& data) {
|
||||
std::vector<FDTensor> inputs(data.size());
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
@@ -69,7 +68,7 @@ std::vector<pybind11::array> PyBackendInfer(
|
||||
inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
|
||||
data[i].shape() + data[i].ndim());
|
||||
inputs[i].data.resize(data[i].nbytes());
|
||||
memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
|
||||
memcpy(inputs[i].MutableData(), data[i].mutable_data(), data[i].nbytes());
|
||||
inputs[i].name = names[i];
|
||||
}
|
||||
|
||||
@@ -81,7 +80,7 @@ std::vector<pybind11::array> PyBackendInfer(
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
|
||||
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
|
||||
memcpy(results[i].mutable_data(), outputs[i].data.data(),
|
||||
memcpy(results[i].mutable_data(), outputs[i].Data(),
|
||||
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
|
||||
}
|
||||
return results;
|
||||
|
@@ -115,7 +115,7 @@ bool PaddleClasModel::Postprocess(const FDTensor& infer_result,
|
||||
ClassifyResult* result, int topk) {
|
||||
int num_classes = infer_result.shape[1];
|
||||
const float* infer_result_buffer =
|
||||
reinterpret_cast<const float*>(infer_result.data.data());
|
||||
reinterpret_cast<const float*>(infer_result.Data());
|
||||
topk = std::min(num_classes, topk);
|
||||
result->label_ids =
|
||||
utils::TopKIndices(infer_result_buffer, num_classes, topk);
|
||||
|
@@ -14,11 +14,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#include "fastdeploy/vision/common/result.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
@@ -87,8 +87,7 @@ void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void NCHW2NHWC(FDTensor& infer_result) {
|
||||
template <typename T> void NCHW2NHWC(FDTensor& infer_result) {
|
||||
T* infer_result_buffer = reinterpret_cast<T*>(infer_result.MutableData());
|
||||
int num = infer_result.shape[0];
|
||||
int channel = infer_result.shape[1];
|
||||
@@ -125,8 +124,8 @@ void SortDetectionResult(DetectionResult* output);
|
||||
void SortDetectionResult(FaceDetectionResult* result);
|
||||
|
||||
// L2 Norm / cosine similarity (for face recognition, ...)
|
||||
FASTDEPLOY_DECL std::vector<float> L2Normalize(
|
||||
const std::vector<float>& values);
|
||||
FASTDEPLOY_DECL std::vector<float>
|
||||
L2Normalize(const std::vector<float>& values);
|
||||
|
||||
FASTDEPLOY_DECL float CosineSimilarity(const std::vector<float>& a,
|
||||
const std::vector<float>& b,
|
||||
|
Reference in New Issue
Block a user