[Other] Optimize code style (#1032)

* Optimize code

* optimize code

* optimize code

* fix compile error
This commit is contained in:
Jason
2023-01-03 19:54:12 +08:00
committed by GitHub
parent ab49b41080
commit f51697d745
31 changed files with 594 additions and 580 deletions

View File

@@ -19,7 +19,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/common/multiclass_nms.h"
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_type.h" #include "fastdeploy/core/fd_type.h"

View File

@@ -2,7 +2,9 @@
namespace fastdeploy { namespace fastdeploy {
__global__ void CudaCastKernel(const float* in, float* out, int edge, int out_bc_offset, int in_bc_offset, int ih, int iw, int oh, int ow, bool is_avg) { __global__ void CudaCastKernel(const float* in, float* out, int edge,
int out_bc_offset, int in_bc_offset, int ih,
int iw, int oh, int ow, bool is_avg) {
int position = blockDim.x * blockIdx.x + threadIdx.x; int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= edge) { if (position >= edge) {
return; return;
@@ -25,14 +27,18 @@ __global__ void CudaCastKernel(const float* in, float* out, int edge, int out_b
if (is_avg) { if (is_avg) {
out[position] = out[position] + in[offset * in_bc_offset + input_idx]; out[position] = out[position] + in[offset * in_bc_offset + input_idx];
} else { } else {
out[position] = max(out[position], in[offset * in_bc_offset + input_idx]); out[position] =
max(out[position], in[offset * in_bc_offset + input_idx]);
} }
} }
} }
out[position] = out[position] / ((hend - hstart) * (wend - wstart)); out[position] = out[position] / ((hend - hstart) * (wend - wstart));
} }
void CudaAdaptivePool(const std::vector<int64_t>& input_dims, const std::vector<int64_t>& output_dims, float* output, const float* input, void* compute_stream, const std::string& pooling_type){ void CudaAdaptivePool(const std::vector<int64_t>& input_dims,
const std::vector<int64_t>& output_dims, float* output,
const float* input, void* compute_stream,
const std::string& pooling_type) {
auto casted_compute_stream = reinterpret_cast<cudaStream_t>(compute_stream); auto casted_compute_stream = reinterpret_cast<cudaStream_t>(compute_stream);
int out_bc_offset = output_dims[2] * output_dims[3]; int out_bc_offset = output_dims[2] * output_dims[3];
int in_bc_offset = input_dims[2] * input_dims[3]; int in_bc_offset = input_dims[2] * input_dims[3];
@@ -44,8 +50,7 @@ void CudaAdaptivePool(const std::vector<int64_t>& input_dims, const std::vector<
int threads = 256; int threads = 256;
int blocks = ceil(jobs / static_cast<float>(threads)); int blocks = ceil(jobs / static_cast<float>(threads));
CudaCastKernel<<<blocks, threads, 0, casted_compute_stream>>>( CudaCastKernel<<<blocks, threads, 0, casted_compute_stream>>>(
input, input, output, jobs, out_bc_offset, in_bc_offset, int(input_dims[2]),
output, int(input_dims[3]), int(output_dims[2]), int(output_dims[3]), is_avg);
jobs, out_bc_offset, in_bc_offset, int(input_dims[2]), int(input_dims[3]), int(output_dims[2]), int(output_dims[3]), is_avg);
} }
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -15,21 +15,18 @@
#pragma once #pragma once
#include <cstdint>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cstdint>
#include <iostream> #include <iostream>
#include <vector>
#include <math.h> #include <math.h>
#include <vector>
namespace fastdeploy { namespace fastdeploy {
void CudaAdaptivePool(const std::vector<int64_t>& input_dims, void CudaAdaptivePool(const std::vector<int64_t>& input_dims,
const std::vector<int64_t>& output_dims, const std::vector<int64_t>& output_dims, float* output,
float* output, const float* input, void* compute_stream,
const float* input,
void* compute_stream,
const std::string& pooling_type); const std::string& pooling_type);
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -341,8 +341,7 @@ int OpenVINOBackend::NumInputs() const { return input_infos_.size(); }
int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); } int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); }
bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs, bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
if (inputs.size() != input_infos_.size()) { if (inputs.size() != input_infos_.size()) {
FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size() FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model(" << ") should keep same with the inputs of this model("
@@ -368,16 +367,14 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
if (copy_to_fd) { if (copy_to_fd) {
(*outputs)[i].Resize(shape, (*outputs)[i].Resize(shape,
OpenVINODataTypeToFD(out_tensor.get_element_type()), OpenVINODataTypeToFD(out_tensor.get_element_type()),
output_infos_[i].name, output_infos_[i].name, Device::CPU);
Device::CPU);
memcpy((*outputs)[i].MutableData(), out_tensor.data(), memcpy((*outputs)[i].MutableData(), out_tensor.data(),
(*outputs)[i].Nbytes()); (*outputs)[i].Nbytes());
} else { } else {
(*outputs)[i].name = output_infos_[i].name; (*outputs)[i].name = output_infos_[i].name;
(*outputs)[i].SetExternalData(shape, (*outputs)[i].SetExternalData(
OpenVINODataTypeToFD(out_tensor.get_element_type()), shape, OpenVINODataTypeToFD(out_tensor.get_element_type()),
out_tensor.data(), out_tensor.data(), Device::CPU);
Device::CPU);
} }
} }
return true; return true;

View File

@@ -47,8 +47,7 @@ class OpenVINOBackend : public BaseBackend {
InitFromOnnx(const std::string& model_file, InitFromOnnx(const std::string& model_file,
const OpenVINOBackendOption& option = OpenVINOBackendOption()); const OpenVINOBackendOption& option = OpenVINOBackendOption());
bool Infer(std::vector<FDTensor>& inputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
int NumInputs() const override; int NumInputs() const override;

View File

@@ -25,17 +25,24 @@ struct OrtTensorDimensions : std::vector<int64_t> {
} }
}; };
void AdaptivePool2dKernel::CpuAdaptivePool(const std::vector<int64_t>& input_size, const std::vector<int64_t>& output_size, const float* input_data, float* output_data){ void AdaptivePool2dKernel::CpuAdaptivePool(
const std::vector<int64_t>& input_size,
const std::vector<int64_t>& output_size, const float* input_data,
float* output_data) {
int64_t in_bc_offset = input_size[2] * input_size[3]; int64_t in_bc_offset = input_size[2] * input_size[3];
int64_t out_bc_offset = output_size[2] * output_size[3]; int64_t out_bc_offset = output_size[2] * output_size[3];
for (int64_t b = 0; b < output_size[0]; b++) { for (int64_t b = 0; b < output_size[0]; b++) {
for (int64_t c = 0; c < output_size[1]; c++) { for (int64_t c = 0; c < output_size[1]; c++) {
for (int64_t h = 0; h < output_size[2]; h++) { for (int64_t h = 0; h < output_size[2]; h++) {
int64_t hstart = std::floor( static_cast<float>(h * input_size[2]) / output_size[2]); int64_t hstart =
int64_t hend = std::ceil(static_cast<float>((h + 1) * input_size[2]) / output_size[2]); std::floor(static_cast<float>(h * input_size[2]) / output_size[2]);
int64_t hend = std::ceil(static_cast<float>((h + 1) * input_size[2]) /
output_size[2]);
for (int64_t w = 0; w < output_size[3]; w++) { for (int64_t w = 0; w < output_size[3]; w++) {
int64_t wstart = std::floor(static_cast<float>(w * input_size[3]) / output_size[3]); int64_t wstart = std::floor(static_cast<float>(w * input_size[3]) /
int64_t wend = std::ceil(static_cast<float>((w + 1) * input_size[3]) / output_size[3]); output_size[3]);
int64_t wend = std::ceil(static_cast<float>((w + 1) * input_size[3]) /
output_size[3]);
int64_t out_offset = h * output_size[3] + w; int64_t out_offset = h * output_size[3] + w;
output_data[out_offset] = 0; output_data[out_offset] = 0;
for (auto i = hstart; i < hend; i++) { for (auto i = hstart; i < hend; i++) {
@@ -44,7 +51,8 @@ void AdaptivePool2dKernel::CpuAdaptivePool(const std::vector<int64_t>& input_siz
output_data[out_offset] += input_data[i * input_size[3] + j]; output_data[out_offset] += input_data[i * input_size[3] + j];
} }
if (pooling_type_ == "max") { if (pooling_type_ == "max") {
output_data[out_offset] = std::max(output_data[out_offset], input_data[i * input_size[3] + j]); output_data[out_offset] = std::max(
output_data[out_offset], input_data[i * input_size[3] + j]);
} }
} }
} }
@@ -79,7 +87,8 @@ void AdaptivePool2dKernel::Compute(OrtKernelContext* context) {
if (!strcmp(this->provider_, "CUDAExecutionProvider")) { if (!strcmp(this->provider_, "CUDAExecutionProvider")) {
#ifdef WITH_GPU #ifdef WITH_GPU
auto compute_stream = ort_.KernelContext_GetGPUComputeStream(context); auto compute_stream = ort_.KernelContext_GetGPUComputeStream(context);
CudaAdaptivePool(input_size, output_size_, output_data, input_data, compute_stream, pooling_type_); CudaAdaptivePool(input_size, output_size_, output_data, input_data,
compute_stream, pooling_type_);
#else #else
FDWARNING << "FastDeploy didn't compile with WITH_GPU. " FDWARNING << "FastDeploy didn't compile with WITH_GPU. "
<< "Will force to use CPU to run." << std::endl; << "Will force to use CPU to run." << std::endl;
@@ -91,9 +100,13 @@ void AdaptivePool2dKernel::Compute(OrtKernelContext* context) {
} }
void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) { void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) {
pooling_type_ = ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type"); pooling_type_ =
output_size_ = ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size"); ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type");
FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 && output_size_[3] > 0, "The output size of adaptive pool must be positive."); output_size_ =
ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size");
FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 &&
output_size_[3] > 0,
"The output size of adaptive pool must be positive.");
} }
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -14,12 +14,12 @@
#pragma once #pragma once
#include <map>
#include <string>
#include <algorithm>
#include <cmath>
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <algorithm>
#include <cmath>
#include <map>
#include <string>
#ifndef NON_64_PLATFORM #ifndef NON_64_PLATFORM
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
@@ -38,8 +38,7 @@ struct AdaptivePool2dKernel {
const char* provider_; const char* provider_;
public: public:
AdaptivePool2dKernel(Ort::CustomOpApi ort, AdaptivePool2dKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info,
const OrtKernelInfo* info,
const char* provider) const char* provider)
: ort_(ort) { : ort_(ort) {
GetAttribute(info); GetAttribute(info);
@@ -52,8 +51,7 @@ struct AdaptivePool2dKernel {
void CpuAdaptivePool(const std::vector<int64_t>& input_size, void CpuAdaptivePool(const std::vector<int64_t>& input_size,
const std::vector<int64_t>& output_size, const std::vector<int64_t>& output_size,
const float* input_data, const float* input_data, float* output_data);
float* output_data);
}; };
struct AdaptivePool2dOp struct AdaptivePool2dOp
@@ -77,9 +75,8 @@ struct AdaptivePool2dOp
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
} }
const char* GetExecutionProviderType() const { const char* GetExecutionProviderType() const { return provider_; }
return provider_;
}
private: private:
const char* provider_; const char* provider_;
}; };

View File

@@ -15,9 +15,9 @@
#ifndef NON_64_PLATFORM #ifndef NON_64_PLATFORM
#include "fastdeploy/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include <algorithm>
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <algorithm>
namespace fastdeploy { namespace fastdeploy {

View File

@@ -16,8 +16,8 @@
#include <memory> #include <memory>
#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include "fastdeploy/backends/ort/ops/adaptive_pool2d.h" #include "fastdeploy/backends/ort/ops/adaptive_pool2d.h"
#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include "fastdeploy/backends/ort/utils.h" #include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/core/float16.h" #include "fastdeploy/core/float16.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
@@ -94,8 +94,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true, &model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), verbose, true, true, true, ops.data(), 2,
2, "onnxruntime", nullptr, 0, "", &save_external)) { "onnxruntime", nullptr, 0, "", &save_external)) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format." FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl; << std::endl;
return false; return false;
@@ -216,15 +216,13 @@ void OrtBackend::OrtValueToFDTensor(const Ort::Value& value, FDTensor* tensor,
memcpy(tensor->MutableData(), value_ptr, numel); memcpy(tensor->MutableData(), value_ptr, numel);
} else { } else {
tensor->name = name; tensor->name = name;
tensor->SetExternalData( tensor->SetExternalData(shape, dtype, const_cast<void*>(value_ptr),
shape, dtype, Device::CPU);
const_cast<void*>(value_ptr), Device::CPU);
} }
} }
bool OrtBackend::Infer(std::vector<FDTensor>& inputs, bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
if (inputs.size() != inputs_desc_.size()) { if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model(" << ") should keep same with the inputs of this model("
@@ -256,8 +254,8 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues(); std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
outputs->resize(ort_outputs.size()); outputs->resize(ort_outputs.size());
for (size_t i = 0; i < ort_outputs.size(); ++i) { for (size_t i = 0; i < ort_outputs.size(); ++i) {
OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name,
outputs_desc_[i].name, copy_to_fd); copy_to_fd);
} }
return true; return true;
@@ -311,10 +309,12 @@ void OrtBackend::InitCustomOperators() {
MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{}; MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
custom_operators_.push_back(multiclass_nms); custom_operators_.push_back(multiclass_nms);
if (option_.use_gpu) { if (option_.use_gpu) {
AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CUDAExecutionProvider"}; AdaptivePool2dOp* adaptive_pool2d =
new AdaptivePool2dOp{"CUDAExecutionProvider"};
custom_operators_.push_back(adaptive_pool2d); custom_operators_.push_back(adaptive_pool2d);
} else { } else {
AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CPUExecutionProvider"}; AdaptivePool2dOp* adaptive_pool2d =
new AdaptivePool2dOp{"CPUExecutionProvider"};
custom_operators_.push_back(adaptive_pool2d); custom_operators_.push_back(adaptive_pool2d);
} }
} }

View File

@@ -18,6 +18,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include <map>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
@@ -67,8 +68,7 @@ class OrtBackend : public BaseBackend {
const OrtBackendOption& option = OrtBackendOption(), const OrtBackendOption& option = OrtBackendOption(),
bool from_memory_buffer = false); bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
int NumInputs() const override { return inputs_desc_.size(); } int NumInputs() const override { return inputs_desc_.size(); }

View File

@@ -104,7 +104,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
std::string contents; std::string contents;
if (option.model_from_memory_) { if (option.model_from_memory_) {
config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_,
params_file.c_str(), option.params_buffer_size_);
contents = model_file; contents = model_file;
} else { } else {
config_.SetModel(model_file, params_file); config_.SetModel(model_file, params_file);
@@ -182,7 +183,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
FDINFO << "Start generating shape range info file." << std::endl; FDINFO << "Start generating shape range info file." << std::endl;
paddle_infer::Config analysis_config; paddle_infer::Config analysis_config;
if (option.model_from_memory_) { if (option.model_from_memory_) {
analysis_config.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); analysis_config.SetModelBuffer(
model_file.c_str(), option.model_buffer_size_, params_file.c_str(),
option.params_buffer_size_);
} else { } else {
analysis_config.SetModel(model_file, params_file); analysis_config.SetModel(model_file, params_file);
} }

View File

@@ -62,8 +62,7 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
} }
void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor, void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
FDTensor* fd_tensor, FDTensor* fd_tensor, bool copy_to_fd) {
bool copy_to_fd) {
auto fd_dtype = PaddleDataTypeToFD(tensor->type()); auto fd_dtype = PaddleDataTypeToFD(tensor->type());
std::vector<int64_t> shape; std::vector<int64_t> shape;
auto tmp_shape = tensor->shape(); auto tmp_shape = tensor->shape();
@@ -99,7 +98,9 @@ void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
} else if (fd_dtype == FDDataType::UINT8) { } else if (fd_dtype == FDDataType::UINT8) {
out_data = tensor->data<uint8_t>(&place, &size); out_data = tensor->data<uint8_t>(&place, &size);
} else { } else {
FDASSERT(false, "Unexpected data type(%s) while infer shared with PaddleBackend.", FDASSERT(
false,
"Unexpected data type(%s) while infer shared with PaddleBackend.",
Str(fd_dtype).c_str()); Str(fd_dtype).c_str());
} }
Device device = Device::CPU; Device device = Device::CPU;
@@ -107,9 +108,7 @@ void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
device = Device::GPU; device = Device::GPU;
} }
fd_tensor->name = tensor->name(); fd_tensor->name = tensor->name();
fd_tensor->SetExternalData( fd_tensor->SetExternalData(shape, fd_dtype, out_data, device);
shape, fd_dtype,
out_data, device);
} }
} }
@@ -153,7 +152,10 @@ FDDataType ReaderDataTypeToFD(int32_t dtype) {
} else if (dtype == 6) { } else if (dtype == 6) {
fd_dtype = FDDataType::FP16; fd_dtype = FDDataType::FP16;
} else { } else {
FDASSERT(false, "Unexpected data type: %d while call ReaderDataTypeToFD in PaddleBackend.", dtype); FDASSERT(false,
"Unexpected data type: %d while call ReaderDataTypeToFD in "
"PaddleBackend.",
dtype);
} }
return fd_dtype; return fd_dtype;
} }

View File

@@ -14,14 +14,14 @@
#pragma once #pragma once
#include <string>
#include <algorithm> #include <algorithm>
#include <unordered_map>
#include <set> #include <set>
#include <string>
#include <unordered_map>
#include "torch/script.h"
#include "iengine.h" #include "iengine.h"
#include "poros_module.h" #include "poros_module.h"
#include "torch/script.h"
namespace baidu { namespace baidu {
namespace mirana { namespace mirana {
@@ -36,7 +36,8 @@ namespace poros {
* @return porosmodule * @return porosmodule
* @retval !nullptr => succeed nullptr => failed * @retval !nullptr => succeed nullptr => failed
**/ **/
std::unique_ptr<PorosModule> Compile(const torch::jit::Module& module, std::unique_ptr<PorosModule>
Compile(const torch::jit::Module& module,
const std::vector<std::vector<c10::IValue>>& prewarm_datas, const std::vector<std::vector<c10::IValue>>& prewarm_datas,
const PorosOptions& options); const PorosOptions& options);
@@ -71,7 +72,6 @@ public:
torch::jit::Module* optimized_module); torch::jit::Module* optimized_module);
private: private:
/** /**
* @brief preprocess this calculation graph * @brief preprocess this calculation graph
* *
@@ -80,7 +80,8 @@ private:
* @return int * @return int
* @retval 0 => succeed <0 => failed * @retval 0 => succeed <0 => failed
**/ **/
int preprocess_graph(const ivalue_vec_t& prewarm_datas, std::shared_ptr<torch::jit::Graph>& graph); int preprocess_graph(const ivalue_vec_t& prewarm_datas,
std::shared_ptr<torch::jit::Graph>& graph);
/** /**
* @brief segement this calculation graph * @brief segement this calculation graph
@@ -93,7 +94,8 @@ private:
// Split subgraphblock) // Split subgraphblock)
// The divided subgraph, as a subgraph, is associated with the block // The divided subgraph, as a subgraph, is associated with the block
int segment_block(torch::jit::Block& block, IEngine* engine, int current_depth); int segment_block(torch::jit::Block& block, IEngine* engine,
int current_depth);
// Subgraph optimization // Subgraph optimization
/** /**
@@ -158,7 +160,8 @@ private:
* @return optimized_module * @return optimized_module
* @retval !nullptr => succeed nullptr => failed * @retval !nullptr => succeed nullptr => failed
**/ **/
std::unique_ptr<torch::jit::Module> CompileGraph(const torch::jit::Module& module, std::unique_ptr<torch::jit::Module>
CompileGraph(const torch::jit::Module& module,
const std::vector<std::vector<c10::IValue>>& prewarm_datas, const std::vector<std::vector<c10::IValue>>& prewarm_datas,
const PorosOptions& options); const PorosOptions& options);

View File

@@ -17,9 +17,9 @@
#include <string> #include <string>
//from pytorch //from pytorch
#include "torch/script.h"
#include "torch/csrc/jit/ir/ir.h"
#include "ATen/core/interned_strings.h" #include "ATen/core/interned_strings.h"
#include "torch/csrc/jit/ir/ir.h"
#include "torch/script.h"
#include "plugin_create.h" #include "plugin_create.h"
@@ -58,9 +58,11 @@ public:
* @param [in] inputs : input tensor * @param [in] inputs : input tensor
* @return [res] output tensor * @return [res] output tensor
**/ **/
virtual std::vector<at::Tensor> excute_engine(const std::vector<at::Tensor>& inputs) = 0; virtual std::vector<at::Tensor>
excute_engine(const std::vector<at::Tensor>& inputs) = 0;
virtual void register_module_attribute(const std::string& name, torch::jit::Module& module) = 0; virtual void register_module_attribute(const std::string& name,
torch::jit::Module& module) = 0;
// Logo // Logo
virtual const std::string who_am_i() = 0; virtual const std::string who_am_i() = 0;
@@ -71,7 +73,6 @@ public:
public: public:
std::pair<uint64_t, uint64_t> _num_io; // Number of input/output parameters std::pair<uint64_t, uint64_t> _num_io; // Number of input/output parameters
EngineID _id; EngineID _id;
}; };
} // namespace poros } // namespace poros

View File

@@ -14,8 +14,8 @@
#pragma once #pragma once
#include <unordered_map>
#include <string> #include <string>
#include <unordered_map>
namespace baidu { namespace baidu {
namespace mirana { namespace mirana {
@@ -31,35 +31,39 @@ typedef IPlugin* (*plugin_creator_t)();
typedef std::unordered_map<std::string, plugin_creator_t> plugin_creator_map_t; typedef std::unordered_map<std::string, plugin_creator_t> plugin_creator_map_t;
IPlugin* create_plugin(const std::string& plugin_name); IPlugin* create_plugin(const std::string& plugin_name);
IPlugin* create_plugin(const std::string& plugin_name, const plugin_creator_map_t& plugin_creator_map); IPlugin* create_plugin(const std::string& plugin_name,
const plugin_creator_map_t& plugin_creator_map);
void create_all_plugins(const plugin_creator_map_t& plugin_creator_map, void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
std::unordered_map<std::string, IPlugin*>& plugin_m); std::unordered_map<std::string, IPlugin*>& plugin_m);
//void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m); //void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
template <typename PluginType> template <typename PluginType> IPlugin* default_plugin_creator() {
IPlugin* default_plugin_creator() {
return new (std::nothrow) PluginType; return new (std::nothrow) PluginType;
} }
void register_plugin_creator(const std::string& plugin_name, plugin_creator_t creator);
void register_plugin_creator(const std::string& plugin_name, void register_plugin_creator(const std::string& plugin_name,
plugin_creator_t creator, plugin_creator_map_t& plugin_creator_map); plugin_creator_t creator);
void register_plugin_creator(const std::string& plugin_name,
plugin_creator_t creator,
plugin_creator_map_t& plugin_creator_map);
template <typename PluginType> template <typename PluginType>
void register_plugin_class(const std::string& plugin_name) { void register_plugin_class(const std::string& plugin_name) {
return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>); return register_plugin_creator(plugin_name,
default_plugin_creator<PluginType>);
} }
// This version is recommended // This version is recommended
template <typename PluginType> template <typename PluginType>
void register_plugin_class(const std::string& plugin_name, plugin_creator_map_t& plugin_creator_map) { void register_plugin_class(const std::string& plugin_name,
return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>, plugin_creator_map); plugin_creator_map_t& plugin_creator_map) {
return register_plugin_creator(
plugin_name, default_plugin_creator<PluginType>, plugin_creator_map);
} }
}//poros } // namespace poros
}//mirana } // namespace mirana
}//baidu } // namespace baidu
/* vim: set ts=4 sw=4 sts=4 tw=100 */ /* vim: set ts=4 sw=4 sts=4 tw=100 */

View File

@@ -14,21 +14,16 @@
#pragma once #pragma once
#include <string>
#include "torch/script.h"
#include "torch/csrc/jit/jit_log.h" #include "torch/csrc/jit/jit_log.h"
#include "torch/script.h"
#include <string>
// #include "ATen/Context.h" // #include "ATen/Context.h"
namespace baidu { namespace baidu {
namespace mirana { namespace mirana {
namespace poros { namespace poros {
enum Device : int8_t { enum Device : int8_t { GPU = 0, CPU, XPU, UNKNOW };
GPU = 0,
CPU,
XPU,
UNKNOW
};
struct PorosOptions { struct PorosOptions {
Device device = GPU; Device device = GPU;
@@ -44,23 +39,20 @@ struct PorosOptions {
class PorosModule : public torch::jit::Module { class PorosModule : public torch::jit::Module {
public: public:
PorosModule(torch::jit::Module module) : torch::jit::Module(module) { PorosModule(torch::jit::Module module) : torch::jit::Module(module) {}
}
~PorosModule() = default; ~PorosModule() = default;
void to_device(Device device){ void to_device(Device device) { _options.device = device; }
_options.device = device;
}
//c10::IValue forward(std::vector<c10::IValue> inputs); //c10::IValue forward(std::vector<c10::IValue> inputs);
//void save(const std::string& filename); //void save(const std::string& filename);
public: public:
PorosOptions _options; PorosOptions _options;
}; };
//via porosmodule.save //via porosmodule.save
std::unique_ptr<PorosModule> Load(const std::string& filename, const PorosOptions& options); std::unique_ptr<PorosModule> Load(const std::string& filename,
const PorosOptions& options);
} // namespace poros } // namespace poros
} // namespace mirana } // namespace mirana

View File

@@ -188,8 +188,7 @@ bool PorosBackend::InitFromPoros(const std::string& model_file,
} }
bool PorosBackend::Infer(std::vector<FDTensor>& inputs, bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
// Convert FD Tensor to PyTorch Tensor // Convert FD Tensor to PyTorch Tensor
std::vector<torch::jit::IValue> poros_inputs; std::vector<torch::jit::IValue> poros_inputs;
bool is_backend_cuda = bool is_backend_cuda =

View File

@@ -74,8 +74,8 @@ class PorosBackend : public BaseBackend {
void BuildOption(const PorosBackendOption& option); void BuildOption(const PorosBackendOption& option);
bool InitFromTorchScript( bool
const std::string& model_file, InitFromTorchScript(const std::string& model_file,
const PorosBackendOption& option = PorosBackendOption()); const PorosBackendOption& option = PorosBackendOption());
bool InitFromPoros(const std::string& model_file, bool InitFromPoros(const std::string& model_file,
@@ -85,8 +85,7 @@ class PorosBackend : public BaseBackend {
std::vector<std::vector<FDTensor>>& prewarm_tensors, std::vector<std::vector<FDTensor>>& prewarm_tensors,
const PorosBackendOption& option = PorosBackendOption()); const PorosBackendOption& option = PorosBackendOption());
bool Infer(std::vector<FDTensor>& inputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
int NumInputs() const { return _numinputs; } int NumInputs() const { return _numinputs; }

View File

@@ -129,8 +129,7 @@ at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
numel * sizeof(double)); numel * sizeof(double));
} }
} else { } else {
FDASSERT(false, FDASSERT(false, "Unrecognized data type while calling "
"Unrecognized data type while calling "
"PorosBackend::CreatePorosValue()."); "PorosBackend::CreatePorosValue().");
} }
return poros_value; return poros_value;

View File

@@ -173,16 +173,15 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
// create input tensor memory // create input tensor memory
// rknn_tensor_mem* input_mems[io_num.n_input]; // rknn_tensor_mem* input_mems[io_num.n_input];
input_mems_ = (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_input); input_mems_ =
(rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_input);
// get input info and copy to input tensor info // get input info and copy to input tensor info
for (uint32_t i = 0; i < io_num.n_input; i++) { for (uint32_t i = 0; i < io_num.n_input; i++) {
input_attrs_[i].index = i; input_attrs_[i].index = i;
// query info // query info
ret = rknn_query(ctx, ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs_[i]),
RKNN_QUERY_INPUT_ATTR,
&(input_attrs_[i]),
sizeof(rknn_tensor_attr)); sizeof(rknn_tensor_attr));
DumpTensorAttr(input_attrs_[i]); DumpTensorAttr(input_attrs_[i]);
@@ -192,10 +191,10 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
} }
if ((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) && if ((input_attrs_[i].fmt != RKNN_TENSOR_NHWC) &&
(input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)) { (input_attrs_[i].fmt != RKNN_TENSOR_UNDEFINED)) {
FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" << std::endl; FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED"
<< std::endl;
} }
// copy input_attrs_ to input tensor info // copy input_attrs_ to input tensor info
std::string temp_name = input_attrs_[i].name; std::string temp_name = input_attrs_[i].name;
std::vector<int> temp_shape{}; std::vector<int> temp_shape{};
@@ -203,25 +202,28 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
for (int j = 0; j < input_attrs_[i].n_dims; j++) { for (int j = 0; j < input_attrs_[i].n_dims; j++) {
temp_shape[j] = (int)input_attrs_[i].dims[j]; temp_shape[j] = (int)input_attrs_[i].dims[j];
} }
FDDataType temp_dtype = fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(input_attrs_[i].type); FDDataType temp_dtype =
fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
input_attrs_[i].type);
TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
inputs_desc_[i] = temp_input_info; inputs_desc_[i] = temp_input_info;
} }
// Get detailed output parameters // Get detailed output parameters
output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); output_attrs_ =
(rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
memset(output_attrs_, 0, io_num.n_output * sizeof(rknn_tensor_attr)); memset(output_attrs_, 0, io_num.n_output * sizeof(rknn_tensor_attr));
outputs_desc_.resize(io_num.n_output); outputs_desc_.resize(io_num.n_output);
// Create output tensor memory // Create output tensor memory
output_mems_ = (rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_output);; output_mems_ =
(rknn_tensor_mem**)malloc(sizeof(rknn_tensor_mem*) * io_num.n_output);
;
for (uint32_t i = 0; i < io_num.n_output; i++) { for (uint32_t i = 0; i < io_num.n_output; i++) {
output_attrs_[i].index = i; output_attrs_[i].index = i;
// query info // query info
ret = rknn_query(ctx, ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]),
RKNN_QUERY_OUTPUT_ATTR,
&(output_attrs_[i]),
sizeof(rknn_tensor_attr)); sizeof(rknn_tensor_attr));
DumpTensorAttr(output_attrs_[i]); DumpTensorAttr(output_attrs_[i]);
@@ -292,8 +294,7 @@ std::vector<TensorInfo> RKNPU2Backend::GetOutputInfos() {
} }
bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
int ret = RKNN_SUCC; int ret = RKNN_SUCC;
// Judge whether the input and output size are the same // Judge whether the input and output size are the same
if (inputs.size() != inputs_desc_.size()) { if (inputs.size() != inputs_desc_.size()) {
@@ -307,11 +308,13 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
for (uint32_t i = 0; i < io_num.n_input; i++) { for (uint32_t i = 0; i < io_num.n_input; i++) {
// Judge whether the input and output types are the same // Judge whether the input and output types are the same
rknn_tensor_type input_type = rknn_tensor_type input_type =
fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[i].dtype); fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(
inputs[i].dtype);
if (input_type != input_attrs_[i].type) { if (input_type != input_attrs_[i].type) {
FDWARNING << "The input tensor type != model's inputs type." FDWARNING << "The input tensor type != model's inputs type."
<< "The input_type need " << get_type_string(input_attrs_[i].type) << "The input_type need "
<< ",but inputs["<< i << "].type is " << get_type_string(input_type) << get_type_string(input_attrs_[i].type) << ",but inputs["
<< i << "].type is " << get_type_string(input_type)
<< std::endl; << std::endl;
} }
@@ -322,7 +325,8 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
if (input_attrs_[i].type == RKNN_TENSOR_FLOAT16 || if (input_attrs_[i].type == RKNN_TENSOR_FLOAT16 ||
input_attrs_[i].type == RKNN_TENSOR_FLOAT32) { input_attrs_[i].type == RKNN_TENSOR_FLOAT32) {
FDINFO << "The input model is not a quantitative model. " FDINFO << "The input model is not a quantitative model. "
"Close the normalize operation." << std::endl; "Close the normalize operation."
<< std::endl;
} }
input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes()); input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes());

View File

@@ -14,9 +14,9 @@
#pragma once #pragma once
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "rknn_api.h" // NOLINT #include "rknn_api.h" // NOLINT
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
@@ -71,8 +71,7 @@ class RKNPU2Backend : public BaseBackend {
TensorInfo GetOutputInfo(int index) override; TensorInfo GetOutputInfo(int index) override;
std::vector<TensorInfo> GetInputInfos() override; std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override; std::vector<TensorInfo> GetOutputInfos() override;
bool Infer(std::vector<FDTensor>& inputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
private: private:

View File

@@ -17,11 +17,14 @@
namespace fastdeploy { namespace fastdeploy {
nvinfer1::PluginFieldCollection AdaptivePool2dPluginCreator::mFC{}; nvinfer1::PluginFieldCollection AdaptivePool2dPluginCreator::mFC{};
std::vector<nvinfer1::PluginField> AdaptivePool2dPluginCreator::mPluginAttributes; std::vector<nvinfer1::PluginField>
AdaptivePool2dPluginCreator::mPluginAttributes;
pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n, const void* input, void* output); pluginStatus_t AdaptivePool2dInference(cudaStream_t stream, int32_t n,
const void* input, void* output);
AdaptivePool2d::AdaptivePool2d(std::vector<int32_t> output_size, std::string pooling_type) { AdaptivePool2d::AdaptivePool2d(std::vector<int32_t> output_size,
std::string pooling_type) {
output_size_ = output_size; output_size_ = output_size;
pooling_type_ = pooling_type; pooling_type_ = pooling_type;
} }
@@ -40,20 +43,17 @@ AdaptivePool2d::AdaptivePool2d(const void* buffer, size_t length) {
FDASSERT(d == a + length, "deserialize failed."); FDASSERT(d == a + length, "deserialize failed.");
} }
int AdaptivePool2d::getNbOutputs() const noexcept { int AdaptivePool2d::getNbOutputs() const noexcept { return 1; }
return 1;
}
nvinfer1::DimsExprs AdaptivePool2d::getOutputDimensions( nvinfer1::DimsExprs AdaptivePool2d::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
int nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept { nvinfer1::IExprBuilder& exprBuilder) noexcept {
try { try {
nvinfer1::DimsExprs output(inputs[0]); nvinfer1::DimsExprs output(inputs[0]);
output.d[2] = exprBuilder.constant(static_cast<int32_t>(output_size_[2])); output.d[2] = exprBuilder.constant(static_cast<int32_t>(output_size_[2]));
output.d[3] = exprBuilder.constant(static_cast<int32_t>(output_size_[3])); output.d[3] = exprBuilder.constant(static_cast<int32_t>(output_size_[3]));
return output; return output;
} } catch (const std::exception& e) {
catch (const std::exception& e) {
FDASSERT(false, "getOutputDimensions failed: %s.", e.what()); FDASSERT(false, "getOutputDimensions failed: %s.", e.what());
} }
return nvinfer1::DimsExprs{}; return nvinfer1::DimsExprs{};
@@ -61,22 +61,22 @@ nvinfer1::DimsExprs AdaptivePool2d::getOutputDimensions(
int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, int AdaptivePool2d::enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, const void* const* inputs, void* const* outputs,
void* const* outputs, void* workspace, cudaStream_t stream) noexcept {
void* workspace,
cudaStream_t stream) noexcept {
if (inputDesc[0].type != nvinfer1::DataType::kFLOAT) { if (inputDesc[0].type != nvinfer1::DataType::kFLOAT) {
return -1; return -1;
} }
auto const* data = static_cast<float const*>(inputs[0]); auto const* data = static_cast<float const*>(inputs[0]);
auto* result = static_cast<float*>(outputs[0]); auto* result = static_cast<float*>(outputs[0]);
int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] * outputDesc[0].dims.d[2]* outputDesc[0].dims.d[3]; int nums = outputDesc[0].dims.d[0] * outputDesc[0].dims.d[1] *
outputDesc[0].dims.d[2] * outputDesc[0].dims.d[3];
std::vector<int64_t> input_size, output_size; std::vector<int64_t> input_size, output_size;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
input_size.push_back(inputDesc[0].dims.d[i]); input_size.push_back(inputDesc[0].dims.d[i]);
output_size.push_back(outputDesc[0].dims.d[i]); output_size.push_back(outputDesc[0].dims.d[i]);
} }
CudaAdaptivePool(input_size, output_size, result, data, stream, pooling_type_); CudaAdaptivePool(input_size, output_size, result, data, stream,
pooling_type_);
return cudaPeekAtLastError(); return cudaPeekAtLastError();
} }
@@ -97,28 +97,26 @@ void AdaptivePool2d::serialize(void* buffer) const noexcept {
FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()"); FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()");
} }
nvinfer1::DataType AdaptivePool2d::getOutputDataType( nvinfer1::DataType
int index, const nvinfer1::DataType* inputType, int nbInputs) const noexcept { AdaptivePool2d::getOutputDataType(int index,
const nvinfer1::DataType* inputType,
int nbInputs) const noexcept {
return inputType[0]; return inputType[0];
} }
bool AdaptivePool2d::supportsFormatCombination( bool AdaptivePool2d::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, int nbOutputs) noexcept { int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs,
int nbOutputs) noexcept {
return (inOut[pos].format == nvinfer1::PluginFormat::kLINEAR); return (inOut[pos].format == nvinfer1::PluginFormat::kLINEAR);
} }
int AdaptivePool2d::initialize() noexcept { int AdaptivePool2d::initialize() noexcept { return 0; }
return 0;
}
void AdaptivePool2d::terminate() noexcept { void AdaptivePool2d::terminate() noexcept { return; }
return;
}
size_t AdaptivePool2d::getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, size_t AdaptivePool2d::getWorkspaceSize(
int nbInputs, const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
const nvinfer1::PluginTensorDesc* outputs, const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const noexcept {
int nbOutputs) const noexcept {
return 0; return 0;
} }
@@ -126,24 +124,21 @@ const char* AdaptivePool2d::getPluginType() const noexcept {
return "AdaptivePool2d"; return "AdaptivePool2d";
} }
const char* AdaptivePool2d::getPluginVersion() const noexcept { const char* AdaptivePool2d::getPluginVersion() const noexcept { return "1"; }
return "1";
}
void AdaptivePool2d::destroy() noexcept { void AdaptivePool2d::destroy() noexcept { return; }
return; void AdaptivePool2d::configurePlugin(
} const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
void AdaptivePool2d::configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept { const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) noexcept {
return; return;
} }
nvinfer1::IPluginV2DynamicExt* AdaptivePool2d::clone() const noexcept { nvinfer1::IPluginV2DynamicExt* AdaptivePool2d::clone() const noexcept {
try { try {
nvinfer1::IPluginV2DynamicExt* plugin = new AdaptivePool2d(output_size_, pooling_type_); nvinfer1::IPluginV2DynamicExt* plugin =
new AdaptivePool2d(output_size_, pooling_type_);
plugin->setPluginNamespace(mNamespace.c_str()); plugin->setPluginNamespace(mNamespace.c_str());
return plugin; return plugin;
} } catch (std::exception const& e) {
catch (std::exception const& e){
FDASSERT(false, "clone failed: %s.", e.what()); FDASSERT(false, "clone failed: %s.", e.what());
} }
return nullptr; return nullptr;
@@ -151,8 +146,10 @@ nvinfer1::IPluginV2DynamicExt* AdaptivePool2d::clone() const noexcept {
AdaptivePool2dPluginCreator::AdaptivePool2dPluginCreator() { AdaptivePool2dPluginCreator::AdaptivePool2dPluginCreator() {
mPluginAttributes.clear(); mPluginAttributes.clear();
mPluginAttributes.emplace_back(nvinfer1::PluginField("output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4)); mPluginAttributes.emplace_back(nvinfer1::PluginField(
mPluginAttributes.emplace_back(nvinfer1::PluginField("pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3)); "output_size", nullptr, nvinfer1::PluginFieldType::kINT32, 4));
mPluginAttributes.emplace_back(nvinfer1::PluginField(
"pooling_type", nullptr, nvinfer1::PluginFieldType::kCHAR, 3));
mFC.nbFields = mPluginAttributes.size(); mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data(); mFC.fields = mPluginAttributes.data();
@@ -166,12 +163,13 @@ const char* AdaptivePool2dPluginCreator::getPluginVersion() const noexcept {
return "1"; return "1";
} }
const nvinfer1::PluginFieldCollection* AdaptivePool2dPluginCreator::getFieldNames() noexcept { const nvinfer1::PluginFieldCollection*
AdaptivePool2dPluginCreator::getFieldNames() noexcept {
return &mFC; return &mFC;
} }
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const char* name, nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(
const nvinfer1::PluginFieldCollection* fc) noexcept { const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept {
try { try {
const nvinfer1::PluginField* fields = fc->fields; const nvinfer1::PluginField* fields = fc->fields;
auto const dims = static_cast<int32_t const*>(fields[0].data); auto const dims = static_cast<int32_t const*>(fields[0].data);
@@ -184,20 +182,17 @@ nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::createPlugin(const c
std::string pooling_type(pooling_type_ptr, 3); std::string pooling_type(pooling_type_ptr, 3);
pooling_type_ = pooling_type; pooling_type_ = pooling_type;
return new AdaptivePool2d(output_size_, pooling_type_); return new AdaptivePool2d(output_size_, pooling_type_);
} } catch (std::exception const& e) {
catch (std::exception const& e){
FDASSERT(false, "createPlugin failed: %s.", e.what()); FDASSERT(false, "createPlugin failed: %s.", e.what());
} }
return nullptr; return nullptr;
} }
nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin(const char* name, nvinfer1::IPluginV2DynamicExt* AdaptivePool2dPluginCreator::deserializePlugin(
const void* serialData, const char* name, const void* serialData, size_t serialLength) noexcept {
size_t serialLength) noexcept {
try { try {
return new AdaptivePool2d(serialData, serialLength); return new AdaptivePool2d(serialData, serialLength);
} } catch (std::exception const& e) {
catch (std::exception const& e){
FDASSERT(false, "deserializePlugin failed: %s.", e.what()); FDASSERT(false, "deserializePlugin failed: %s.", e.what());
} }
return nullptr; return nullptr;

View File

@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
#include "common.h" // NOLINT #include "common.h" // NOLINT
#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
namespace fastdeploy { namespace fastdeploy {
@@ -28,22 +28,18 @@ class AdaptivePool2d : public BasePlugin {
int getNbOutputs() const noexcept override; int getNbOutputs() const noexcept override;
nvinfer1::DimsExprs getOutputDimensions( nvinfer1::DimsExprs
int outputIndex, getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs* inputs,
const nvinfer1::DimsExprs* inputs,
int nbInputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) noexcept override; nvinfer1::IExprBuilder& exprBuilder) noexcept override;
nvinfer1::DataType getOutputDataType( nvinfer1::DataType getOutputDataType(int index,
int index,
const nvinfer1::DataType* inputType, const nvinfer1::DataType* inputType,
int nbInputs) const noexcept override; int nbInputs) const noexcept override;
bool supportsFormatCombination( bool supportsFormatCombination(int pos,
int pos,
const nvinfer1::PluginTensorDesc* inOut, const nvinfer1::PluginTensorDesc* inOut,
int nbInputs, int nbInputs, int nbOutputs) noexcept override;
int nbOutputs) noexcept override;
int initialize() noexcept override; int initialize() noexcept override;
@@ -56,9 +52,7 @@ class AdaptivePool2d : public BasePlugin {
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc, const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, const void* const* inputs, void* const* outputs, void* workspace,
void* const* outputs,
void* workspace,
cudaStream_t stream) noexcept override; cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override; size_t getSerializationSize() const noexcept override;
@@ -93,11 +87,12 @@ class AdaptivePool2dPluginCreator : public BaseCreator {
const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override; const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override;
nvinfer1::IPluginV2DynamicExt* createPlugin(const char* name, nvinfer1::IPluginV2DynamicExt*
createPlugin(const char* name,
const nvinfer1::PluginFieldCollection* fc) noexcept override; const nvinfer1::PluginFieldCollection* fc) noexcept override;
nvinfer1::IPluginV2DynamicExt* deserializePlugin(const char* name, nvinfer1::IPluginV2DynamicExt*
const void* serialData, deserializePlugin(const char* name, const void* serialData,
size_t serialLength) noexcept override; size_t serialLength) noexcept override;
private: private:

View File

@@ -17,12 +17,12 @@
#include "NvInferPlugin.h" #include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h" #include "NvInferRuntimeCommon.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <cstring>
#include <iostream> #include <iostream>
#include <memory>
#include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory>
#include <cstring>
#include <sstream>
namespace fastdeploy { namespace fastdeploy {
@@ -62,15 +62,13 @@ typedef enum {
} pluginStatus_t; } pluginStatus_t;
// Write values into buffer // Write values into buffer
template <typename T> template <typename T> void write(char*& buffer, const T& val) {
void write(char*& buffer, const T& val) {
std::memcpy(buffer, &val, sizeof(T)); std::memcpy(buffer, &val, sizeof(T));
buffer += sizeof(T); buffer += sizeof(T);
} }
// Read values from buffer // Read values from buffer
template <typename T> template <typename T> T read(const char*& buffer) {
T read(const char*& buffer) {
T val{}; T val{};
std::memcpy(&val, buffer, sizeof(T)); std::memcpy(&val, buffer, sizeof(T));
buffer += sizeof(T); buffer += sizeof(T);

View File

@@ -134,9 +134,9 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
int calibration_cache_size = 0; int calibration_cache_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true, &model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), verbose, true, true, true, ops.data(), 1, "tensorrt",
1, "tensorrt", &calibration_cache_ptr, &calibration_cache_size, "",
&calibration_cache_ptr, &calibration_cache_size, "", &save_external_)) { &save_external_)) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format." FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl; << std::endl;
return false; return false;
@@ -215,13 +215,14 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
outputs_desc_.resize(onnx_reader.num_outputs); outputs_desc_.resize(onnx_reader.num_outputs);
for (int i = 0; i < onnx_reader.num_inputs; ++i) { for (int i = 0; i < onnx_reader.num_inputs; ++i) {
std::string name(onnx_reader.inputs[i].name); std::string name(onnx_reader.inputs[i].name);
std::vector<int64_t> shape( std::vector<int64_t> shape(onnx_reader.inputs[i].shape,
onnx_reader.inputs[i].shape, onnx_reader.inputs[i].shape +
onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank); onnx_reader.inputs[i].rank);
inputs_desc_[i].name = name; inputs_desc_[i].name = name;
inputs_desc_[i].shape.assign(shape.begin(), shape.end()); inputs_desc_[i].shape.assign(shape.begin(), shape.end());
inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype); inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
inputs_desc_[i].original_dtype = ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype); inputs_desc_[i].original_dtype =
ReaderDtypeToFDDtype(onnx_reader.inputs[i].dtype);
auto info = ShapeRangeInfo(shape); auto info = ShapeRangeInfo(shape);
info.name = name; info.name = name;
auto iter_min = option.min_shape.find(name); auto iter_min = option.min_shape.find(name);
@@ -237,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
for (int i = 0; i < onnx_reader.num_outputs; ++i) { for (int i = 0; i < onnx_reader.num_outputs; ++i) {
std::string name(onnx_reader.outputs[i].name); std::string name(onnx_reader.outputs[i].name);
std::vector<int64_t> shape( std::vector<int64_t> shape(onnx_reader.outputs[i].shape,
onnx_reader.outputs[i].shape, onnx_reader.outputs[i].shape +
onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank); onnx_reader.outputs[i].rank);
outputs_desc_[i].name = name; outputs_desc_[i].name = name;
outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].shape.assign(shape.begin(), shape.end());
outputs_desc_[i].dtype = outputs_desc_[i].dtype =
@@ -283,8 +284,7 @@ int TrtBackend::ShapeRangeInfoUpdated(const std::vector<FDTensor>& inputs) {
} }
bool TrtBackend::Infer(std::vector<FDTensor>& inputs, bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
if (inputs.size() != NumInputs()) { if (inputs.size() != NumInputs()) {
FDERROR << "Require " << NumInputs() << "inputs, but get " << inputs.size() FDERROR << "Require " << NumInputs() << "inputs, but get " << inputs.size()
<< "." << std::endl; << "." << std::endl;
@@ -297,7 +297,8 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
<< "TensorRT engine will be rebuilt once shape range information " << "TensorRT engine will be rebuilt once shape range information "
"changed, this may take lots of time, you can set a proper shape " "changed, this may take lots of time, you can set a proper shape "
"range before loading model to avoid rebuilding process. refer " "range before loading model to avoid rebuilding process. refer "
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/" "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/"
"faq/"
"tensorrt_tricks.md for more details." "tensorrt_tricks.md for more details."
<< std::endl; << std::endl;
BuildTrtEngine(); BuildTrtEngine();
@@ -314,31 +315,35 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
for (size_t i = 0; i < outputs->size(); ++i) { for (size_t i = 0; i < outputs->size(); ++i) {
// if the final output tensor's dtype is different from the model output tensor's dtype, // if the final output tensor's dtype is different from the model output tensor's dtype,
// then we need cast the data to the final output's dtype // then we need cast the data to the final output's dtype
auto model_output_dtype = GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype()); auto model_output_dtype =
GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
if ((*outputs)[i].dtype != model_output_dtype) { if ((*outputs)[i].dtype != model_output_dtype) {
FDTensor output_tensor; FDTensor output_tensor;
output_tensor.SetExternalData((*outputs)[i].shape, model_output_dtype, output_tensor.SetExternalData(
outputs_device_buffer_[(*outputs)[i].name].data(), (*outputs)[i].shape, model_output_dtype,
Device::GPU); outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU);
casted_output_tensors_[(*outputs)[i].name].Resize((*outputs)[i].shape, (*outputs)[i].dtype, casted_output_tensors_[(*outputs)[i].name].Resize(
(*outputs)[i].name, Device::GPU); (*outputs)[i].shape, (*outputs)[i].dtype, (*outputs)[i].name,
function::CudaCast(output_tensor, &casted_output_tensors_[(*outputs)[i].name], stream_); Device::GPU);
function::CudaCast(output_tensor,
&casted_output_tensors_[(*outputs)[i].name], stream_);
if (!copy_to_fd) { if (!copy_to_fd) {
(*outputs)[i].SetExternalData((*outputs)[i].shape, model_output_dtype, (*outputs)[i].SetExternalData(
(*outputs)[i].shape, model_output_dtype,
casted_output_tensors_[(*outputs)[i].name].MutableData(), casted_output_tensors_[(*outputs)[i].name].MutableData(),
Device::GPU, option_.gpu_id); Device::GPU, option_.gpu_id);
} }
} else { } else {
casted_output_tensors_[(*outputs)[i].name].SetExternalData( casted_output_tensors_[(*outputs)[i].name].SetExternalData(
(*outputs)[i].shape, model_output_dtype, (*outputs)[i].shape, model_output_dtype,
outputs_device_buffer_[(*outputs)[i].name].data(), outputs_device_buffer_[(*outputs)[i].name].data(), Device::GPU);
Device::GPU);
} }
} }
if (copy_to_fd) { if (copy_to_fd) {
for (size_t i = 0; i < outputs->size(); ++i) { for (size_t i = 0; i < outputs->size(); ++i) {
FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(), FDASSERT(
cudaMemcpyAsync((*outputs)[i].Data(),
casted_output_tensors_[(*outputs)[i].name].Data(), casted_output_tensors_[(*outputs)[i].name].Data(),
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost, (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
stream_) == 0, stream_) == 0,
@@ -356,10 +361,12 @@ void TrtBackend::GetInputOutputInfo() {
std::unordered_map<std::string, FDDataType> inputs_original_dtype_map; std::unordered_map<std::string, FDDataType> inputs_original_dtype_map;
std::unordered_map<std::string, FDDataType> outputs_original_dtype_map; std::unordered_map<std::string, FDDataType> outputs_original_dtype_map;
for (size_t i = 0; i < inputs_desc_.size(); ++i) { for (size_t i = 0; i < inputs_desc_.size(); ++i) {
inputs_original_dtype_map[inputs_desc_[i].name] = inputs_desc_[i].original_dtype; inputs_original_dtype_map[inputs_desc_[i].name] =
inputs_desc_[i].original_dtype;
} }
for (size_t i = 0; i < outputs_desc_.size(); ++i) { for (size_t i = 0; i < outputs_desc_.size(); ++i) {
outputs_original_dtype_map[outputs_desc_[i].name] = outputs_desc_[i].original_dtype; outputs_original_dtype_map[outputs_desc_[i].name] =
outputs_desc_[i].original_dtype;
} }
// Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_ // Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_
@@ -373,12 +380,18 @@ void TrtBackend::GetInputOutputInfo() {
auto shape = ToVec(engine_->getBindingDimensions(i)); auto shape = ToVec(engine_->getBindingDimensions(i));
auto dtype = engine_->getBindingDataType(i); auto dtype = engine_->getBindingDataType(i);
if (engine_->bindingIsInput(i)) { if (engine_->bindingIsInput(i)) {
auto original_dtype = inputs_original_dtype_map.count(name) ? inputs_original_dtype_map[name] : GetFDDataType(dtype); auto original_dtype = inputs_original_dtype_map.count(name)
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype}); ? inputs_original_dtype_map[name]
: GetFDDataType(dtype);
inputs_desc_.emplace_back(
TrtValueInfo{name, shape, dtype, original_dtype});
inputs_device_buffer_[name] = FDDeviceBuffer(dtype); inputs_device_buffer_[name] = FDDeviceBuffer(dtype);
} else { } else {
auto original_dtype = outputs_original_dtype_map.count(name) ? outputs_original_dtype_map[name] : GetFDDataType(dtype); auto original_dtype = outputs_original_dtype_map.count(name)
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype, original_dtype}); ? outputs_original_dtype_map[name]
: GetFDDataType(dtype);
outputs_desc_.emplace_back(
TrtValueInfo{name, shape, dtype, original_dtype});
outputs_device_buffer_[name] = FDDeviceBuffer(dtype); outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
casted_output_tensors_[name] = FDTensor(); casted_output_tensors_[name] = FDTensor();
} }
@@ -391,7 +404,8 @@ void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
for (const auto& item : inputs) { for (const auto& item : inputs) {
// auto idx = engine_->getBindingIndex(item.name.c_str()); // auto idx = engine_->getBindingIndex(item.name.c_str());
auto iter = io_name_index_.find(item.name); auto iter = io_name_index_.find(item.name);
FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str()); FDASSERT(iter != io_name_index_.end(),
"TRTBackend SetInputs not find name:%s", item.name.c_str());
auto idx = iter->second; auto idx = iter->second;
std::vector<int> shape(item.shape.begin(), item.shape.end()); std::vector<int> shape(item.shape.begin(), item.shape.end());
auto dims = ToDims(shape); auto dims = ToDims(shape);
@@ -424,9 +438,8 @@ void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
"Error occurs while copy memory from CPU to GPU."); "Error occurs while copy memory from CPU to GPU.");
} else { } else {
FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(), FDASSERT(cudaMemcpyAsync(inputs_device_buffer_[item.name].data(),
item.Data(), item.Data(), item.Nbytes(),
item.Nbytes(), cudaMemcpyHostToDevice, cudaMemcpyHostToDevice, stream_) == 0,
stream_) == 0,
"Error occurs while copy memory from CPU to GPU."); "Error occurs while copy memory from CPU to GPU.");
} }
} }
@@ -443,7 +456,9 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
for (size_t i = 0; i < outputs_desc_.size(); ++i) { for (size_t i = 0; i < outputs_desc_.size(); ++i) {
// auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str()); // auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
auto idx_iter = io_name_index_.find(outputs_desc_[i].name); auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str()); FDASSERT(idx_iter != io_name_index_.end(),
"TRTBackend Outputs not find name:%s",
outputs_desc_[i].name.c_str());
auto idx = idx_iter->second; auto idx = idx_iter->second;
auto output_dims = context_->getBindingDimensions(idx); auto output_dims = context_->getBindingDimensions(idx);
@@ -471,8 +486,7 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
} else { } else {
(*outputs)[ori_idx].name = outputs_desc_[i].name; (*outputs)[ori_idx].name = outputs_desc_[i].name;
(*outputs)[ori_idx].SetExternalData( (*outputs)[ori_idx].SetExternalData(
shape, outputs_desc_[i].original_dtype, shape, outputs_desc_[i].original_dtype, bindings_[idx], Device::GPU,
bindings_[idx], Device::GPU,
option_.gpu_id); option_.gpu_id);
} }
} }
@@ -587,7 +601,8 @@ bool TrtBackend::BuildTrtEngine() {
if (option_.serialize_file != "") { if (option_.serialize_file != "") {
FDINFO << "Serialize TensorRTEngine to local file " FDINFO << "Serialize TensorRTEngine to local file "
<< option_.serialize_file << "." << std::endl; << option_.serialize_file << "." << std::endl;
std::ofstream engine_file(option_.serialize_file.c_str(), std::ios::binary | std::ios::out); std::ofstream engine_file(option_.serialize_file.c_str(),
std::ios::binary | std::ios::out);
if (!engine_file) { if (!engine_file) {
FDERROR << "Failed to open " << option_.serialize_file << " to write." FDERROR << "Failed to open " << option_.serialize_file << " to write."
<< std::endl; << std::endl;
@@ -631,7 +646,8 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) {
if (save_external_) { if (save_external_) {
model_parser = !parser_->parseFromFile(onnx_model_buffer.c_str(), 0); model_parser = !parser_->parseFromFile(onnx_model_buffer.c_str(), 0);
} else { } else {
model_parser = !parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size()); model_parser =
!parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size());
} }
if (model_parser) { if (model_parser) {
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl; FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
@@ -665,7 +681,8 @@ bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) {
"should be noticed that FastDeploy will rebuild the engine while " "should be noticed that FastDeploy will rebuild the engine while "
"new input shape is out of the collected shape range, this may " "new input shape is out of the collected shape range, this may "
"bring some time consuming problem, refer " "bring some time consuming problem, refer "
"https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/faq/" "https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/"
"faq/"
"tensorrt_tricks.md for more details." "tensorrt_tricks.md for more details."
<< std::endl; << std::endl;
initialized_ = true; initialized_ = true;
@@ -732,16 +749,13 @@ std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option), FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
"Clone model from ONNX failed while initialize TrtBackend."); "Clone model from ONNX failed while initialize TrtBackend.");
} else { } else {
FDASSERT(casted_backend->InitFromPaddle(option_.model_file, FDASSERT(casted_backend->InitFromPaddle(
option_.params_file, clone_option), option_.model_file, option_.params_file, clone_option),
"Clone model from Paddle failed while initialize TrtBackend."); "Clone model from Paddle failed while initialize TrtBackend.");
} }
FDWARNING << "The target device id:" FDWARNING << "The target device id:" << device_id
<< device_id << " is different from current device id:" << option_.gpu_id
<< " is different from current device id:" << ", cannot share memory with current engine." << std::endl;
<< option_.gpu_id
<< ", cannot share memory with current engine."
<< std::endl;
return new_backend; return new_backend;
} }
cudaSetDevice(option_.gpu_id); cudaSetDevice(option_.gpu_id);
@@ -753,9 +767,12 @@ std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
"[ERROR] Error occurs while clone calling cudaStreamCreate()."); "[ERROR] Error occurs while clone calling cudaStreamCreate().");
} }
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end()); casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end()); casted_backend->outputs_desc_.assign(outputs_desc_.begin(),
casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end()); outputs_desc_.end());
casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end()); casted_backend->outputs_order_.insert(outputs_order_.begin(),
outputs_order_.end());
casted_backend->shape_range_info_.insert(shape_range_info_.begin(),
shape_range_info_.end());
casted_backend->engine_ = engine_; casted_backend->engine_ = engine_;
casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>( casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
casted_backend->engine_->createExecutionContext()); casted_backend->engine_->createExecutionContext());

View File

@@ -97,8 +97,7 @@ class TrtBackend : public BaseBackend {
bool InitFromOnnx(const std::string& model_file, bool InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option = TrtBackendOption(), const TrtBackendOption& option = TrtBackendOption(),
bool from_memory_buffer = false); bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
int NumInputs() const { return inputs_desc_.size(); } int NumInputs() const { return inputs_desc_.size(); }

View File

@@ -32,8 +32,7 @@
namespace fastdeploy { namespace fastdeploy {
struct FDInferDeleter { struct FDInferDeleter {
template <typename T> template <typename T> void operator()(T* obj) const {
void operator()(T* obj) const {
if (obj) { if (obj) {
delete obj; delete obj;
// obj->destroy(); // obj->destroy();
@@ -41,8 +40,7 @@ struct FDInferDeleter {
} }
}; };
template <typename T> template <typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
int64_t Volume(const nvinfer1::Dims& d); int64_t Volume(const nvinfer1::Dims& d);
@@ -72,17 +70,13 @@ std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
return out; return out;
} }
template <typename AllocFunc, typename FreeFunc> template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
class FDGenericBuffer {
public: public:
//! //!
//! \brief Construct an empty buffer. //! \brief Construct an empty buffer.
//! //!
explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) explicit FDGenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0), : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr),
mCapacity(0),
mType(type),
mBuffer(nullptr),
mExternal_buffer(nullptr) {} mExternal_buffer(nullptr) {}
//! //!
@@ -104,9 +98,7 @@ class FDGenericBuffer {
} }
FDGenericBuffer(FDGenericBuffer&& buf) FDGenericBuffer(FDGenericBuffer&& buf)
: mSize(buf.mSize), : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
mCapacity(buf.mCapacity),
mType(buf.mType),
mBuffer(buf.mBuffer) { mBuffer(buf.mBuffer) {
buf.mSize = 0; buf.mSize = 0;
buf.mCapacity = 0; buf.mCapacity = 0;
@@ -133,7 +125,8 @@ class FDGenericBuffer {
//! \brief Returns pointer to underlying array. //! \brief Returns pointer to underlying array.
//! //!
void* data() { void* data() {
if (mExternal_buffer != nullptr) return mExternal_buffer; if (mExternal_buffer != nullptr)
return mExternal_buffer;
return mBuffer; return mBuffer;
} }
@@ -141,7 +134,8 @@ class FDGenericBuffer {
//! \brief Returns pointer to underlying array. //! \brief Returns pointer to underlying array.
//! //!
const void* data() const { const void* data() const {
if (mExternal_buffer != nullptr) return mExternal_buffer; if (mExternal_buffer != nullptr)
return mExternal_buffer;
return mBuffer; return mBuffer;
} }
@@ -213,8 +207,8 @@ class FDGenericBuffer {
}; };
using FDDeviceBuffer = FDGenericBuffer<FDDeviceAllocator, FDDeviceFree>; using FDDeviceBuffer = FDGenericBuffer<FDDeviceAllocator, FDDeviceFree>;
using FDDeviceHostBuffer = FDGenericBuffer<FDDeviceHostAllocator, using FDDeviceHostBuffer =
FDDeviceHostFree>; FDGenericBuffer<FDDeviceHostAllocator, FDDeviceHostFree>;
class FDTrtLogger : public nvinfer1::ILogger { class FDTrtLogger : public nvinfer1::ILogger {
public: public:

View File

@@ -12,13 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/common/multiclass_nms.h" #include "fastdeploy/vision/detection/ppdet/multiclass_nms.h"
#include <algorithm> #include <algorithm>
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {
namespace backend { namespace vision {
namespace detection {
template <class T> template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1, bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) { const std::pair<float, T>& pair2) {
@@ -79,7 +80,7 @@ float JaccardOverlap(const float* box1, const float* box2,
} }
} }
void MultiClassNMS::FastNMS(const float* boxes, const float* scores, void PaddleMultiClassNMS::FastNMS(const float* boxes, const float* scores,
const int& num_boxes, const int& num_boxes,
std::vector<int>* keep_indices) { std::vector<int>* keep_indices) {
std::vector<std::pair<float, int>> sorted_indices; std::vector<std::pair<float, int>> sorted_indices;
@@ -109,7 +110,7 @@ void MultiClassNMS::FastNMS(const float* boxes, const float* scores,
} }
} }
int MultiClassNMS::NMSForEachSample( int PaddleMultiClassNMS::NMSForEachSample(
const float* boxes, const float* scores, int num_boxes, int num_classes, const float* boxes, const float* scores, int num_boxes, int num_classes,
std::map<int, std::vector<int>>* keep_indices) { std::map<int, std::vector<int>>* keep_indices) {
for (int i = 0; i < num_classes; ++i) { for (int i = 0; i < num_classes; ++i) {
@@ -152,7 +153,7 @@ int MultiClassNMS::NMSForEachSample(
return num_det; return num_det;
} }
void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data, void PaddleMultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
const std::vector<int64_t>& boxes_dim, const std::vector<int64_t>& boxes_dim,
const std::vector<int64_t>& scores_dim) { const std::vector<int64_t>& scores_dim) {
int score_size = scores_dim.size(); int score_size = scores_dim.size();
@@ -220,5 +221,6 @@ void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
} }
} }
} }
} // namespace backend } // namespace detection
} // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -18,8 +18,9 @@
#include <vector> #include <vector>
namespace fastdeploy { namespace fastdeploy {
namespace backend { namespace vision {
struct MultiClassNMS { namespace detection {
struct PaddleMultiClassNMS {
int64_t background_label = -1; int64_t background_label = -1;
int64_t keep_top_k = -1; int64_t keep_top_k = -1;
float nms_eta; float nms_eta;
@@ -40,6 +41,6 @@ struct MultiClassNMS {
const std::vector<int64_t>& boxes_dim, const std::vector<int64_t>& boxes_dim,
const std::vector<int64_t>& scores_dim); const std::vector<int64_t>& scores_dim);
}; };
} // namespace backend } // namespace detection
} // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/detection/ppdet/postprocessor.h" #include "fastdeploy/vision/detection/ppdet/postprocessor.h"
#include "fastdeploy/vision/detection/ppdet/multiclass_nms.h"
#include "fastdeploy/vision/utils/utils.h" #include "fastdeploy/vision/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {
@@ -176,7 +177,7 @@ bool PaddleDetPostprocessor::ProcessUnDecodeResults(
return false; return false;
} }
backend::MultiClassNMS nms; PaddleMultiClassNMS nms;
nms.background_label = -1; nms.background_label = -1;
nms.keep_top_k = 100; nms.keep_top_k = 100;
nms.nms_eta = 1.0; nms.nms_eta = 1.0;