mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Other] Unify initialize api for lite/trt backend (#1249)
* Unify initialize api for lite/trt backend * Unify initialize api for lite/trt backend
This commit is contained in:
@@ -56,18 +56,39 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LiteBackend::InitFromPaddle(const std::string& model_file,
|
bool LiteBackend::Init(const RuntimeOption& runtime_option) {
|
||||||
const std::string& params_file,
|
|
||||||
const LiteBackendOption& option) {
|
|
||||||
if (initialized_) {
|
if (initialized_) {
|
||||||
FDERROR << "LiteBackend is already initialized, cannot initialize again."
|
FDERROR << "LiteBackend is already initialized, cannot initialize again."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
config_.set_model_file(model_file);
|
if (runtime_option.model_format != ModelFormat::PADDLE) {
|
||||||
config_.set_param_file(params_file);
|
FDERROR
|
||||||
BuildOption(option);
|
<< "PaddleLiteBackend only supports model format PADDLE, but now it's "
|
||||||
|
<< runtime_option.model_format << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (runtime_option.device != Device::CPU &&
|
||||||
|
runtime_option.device != Device::KUNLUNXIN &&
|
||||||
|
runtime_option.device != Device::ASCEND &&
|
||||||
|
runtime_option.device != Device::TIMVX) {
|
||||||
|
FDERROR << "PaddleLiteBackend only supports "
|
||||||
|
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND, "
|
||||||
|
"but now it's "
|
||||||
|
<< runtime_option.device << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (runtime_option.model_from_memory_) {
|
||||||
|
FDERROR << "PaddleLiteBackend doesn't support load model from memory, "
|
||||||
|
"please load model from disk."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
config_.set_model_file(runtime_option.model_file);
|
||||||
|
config_.set_param_file(runtime_option.params_file);
|
||||||
|
BuildOption(runtime_option.paddle_lite_option);
|
||||||
predictor_ =
|
predictor_ =
|
||||||
paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::CxxConfig>(
|
paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::CxxConfig>(
|
||||||
config_);
|
config_);
|
||||||
|
@@ -22,6 +22,7 @@
|
|||||||
#include "paddle_api.h" // NOLINT
|
#include "paddle_api.h" // NOLINT
|
||||||
|
|
||||||
#include "fastdeploy/runtime/backends/backend.h"
|
#include "fastdeploy/runtime/backends/backend.h"
|
||||||
|
#include "fastdeploy/runtime/runtime_option.h"
|
||||||
#include "fastdeploy/runtime/backends/lite/option.h"
|
#include "fastdeploy/runtime/backends/lite/option.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
@@ -30,11 +31,8 @@ class LiteBackend : public BaseBackend {
|
|||||||
public:
|
public:
|
||||||
LiteBackend() {}
|
LiteBackend() {}
|
||||||
virtual ~LiteBackend() = default;
|
virtual ~LiteBackend() = default;
|
||||||
void BuildOption(const LiteBackendOption& option);
|
|
||||||
|
|
||||||
bool InitFromPaddle(const std::string& model_file,
|
bool Init(const RuntimeOption& option);
|
||||||
const std::string& params_file,
|
|
||||||
const LiteBackendOption& option = LiteBackendOption());
|
|
||||||
|
|
||||||
bool Infer(std::vector<FDTensor>& inputs,
|
bool Infer(std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs,
|
std::vector<FDTensor>* outputs,
|
||||||
@@ -50,6 +48,8 @@ class LiteBackend : public BaseBackend {
|
|||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void BuildOption(const LiteBackendOption& option);
|
||||||
|
|
||||||
void ConfigureCpu(const LiteBackendOption& option);
|
void ConfigureCpu(const LiteBackendOption& option);
|
||||||
void ConfigureTimvx(const LiteBackendOption& option);
|
void ConfigureTimvx(const LiteBackendOption& option);
|
||||||
void ConfigureAscend(const LiteBackendOption& option);
|
void ConfigureAscend(const LiteBackendOption& option);
|
||||||
|
@@ -113,6 +113,50 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TrtBackend::Init(const RuntimeOption& runtime_option) {
|
||||||
|
if (runtime_option.device != Device::GPU) {
|
||||||
|
FDERROR << "TrtBackend only supports Device::GPU, but now it's "
|
||||||
|
<< runtime_option.device << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (runtime_option.model_format != ModelFormat::PADDLE &&
|
||||||
|
runtime_option.model_format != ModelFormat::ONNX) {
|
||||||
|
FDERROR
|
||||||
|
<< "TrtBackend only supports model format PADDLE/ONNX, but now it's "
|
||||||
|
<< runtime_option.model_format << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (runtime_option.model_format == ModelFormat::PADDLE) {
|
||||||
|
if (runtime_option.model_from_memory_) {
|
||||||
|
return InitFromPaddle(runtime_option.model_file,
|
||||||
|
runtime_option.params_file,
|
||||||
|
runtime_option.trt_option);
|
||||||
|
} else {
|
||||||
|
std::string model_buffer;
|
||||||
|
std::string params_buffer;
|
||||||
|
FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
|
||||||
|
"Failed to read model file %s.",
|
||||||
|
runtime_option.model_file.c_str());
|
||||||
|
FDASSERT(ReadBinaryFromFile(runtime_option.params_file, ¶ms_buffer),
|
||||||
|
"Failed to read parameters file %s.",
|
||||||
|
runtime_option.params_file.c_str());
|
||||||
|
return InitFromPaddle(model_buffer, params_buffer,
|
||||||
|
runtime_option.trt_option);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (runtime_option.model_from_memory_) {
|
||||||
|
return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option);
|
||||||
|
} else {
|
||||||
|
std::string model_buffer;
|
||||||
|
FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
|
||||||
|
"Failed to read model file %s.",
|
||||||
|
runtime_option.model_file.c_str());
|
||||||
|
return InitFromOnnx(model_buffer, runtime_option.trt_option);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
|
bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
|
||||||
const std::string& params_buffer,
|
const std::string& params_buffer,
|
||||||
const TrtBackendOption& option, bool verbose) {
|
const TrtBackendOption& option, bool verbose) {
|
||||||
|
@@ -70,14 +70,8 @@ FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
|||||||
class TrtBackend : public BaseBackend {
|
class TrtBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
||||||
void BuildOption(const TrtBackendOption& option);
|
|
||||||
|
|
||||||
bool InitFromPaddle(const std::string& model_buffer,
|
bool Init(const RuntimeOption& runtime_option);
|
||||||
const std::string& params_buffer,
|
|
||||||
const TrtBackendOption& option = TrtBackendOption(),
|
|
||||||
bool verbose = false);
|
|
||||||
bool InitFromOnnx(const std::string& model_buffer,
|
|
||||||
const TrtBackendOption& option = TrtBackendOption());
|
|
||||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||||
bool copy_to_fd = true) override;
|
bool copy_to_fd = true) override;
|
||||||
|
|
||||||
@@ -98,6 +92,15 @@ class TrtBackend : public BaseBackend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void BuildOption(const TrtBackendOption& option);
|
||||||
|
|
||||||
|
bool InitFromPaddle(const std::string& model_buffer,
|
||||||
|
const std::string& params_buffer,
|
||||||
|
const TrtBackendOption& option = TrtBackendOption(),
|
||||||
|
bool verbose = false);
|
||||||
|
bool InitFromOnnx(const std::string& model_buffer,
|
||||||
|
const TrtBackendOption& option = TrtBackendOption());
|
||||||
|
|
||||||
TrtBackendOption option_;
|
TrtBackendOption option_;
|
||||||
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
||||||
std::shared_ptr<nvinfer1::IExecutionContext> context_;
|
std::shared_ptr<nvinfer1::IExecutionContext> context_;
|
||||||
|
@@ -324,12 +324,6 @@ void Runtime::CreateOrtBackend() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateTrtBackend() {
|
void Runtime::CreateTrtBackend() {
|
||||||
FDASSERT(option.device == Device::GPU,
|
|
||||||
"Backend::TRT only supports Device::GPU.");
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
|
||||||
option.model_format == ModelFormat::ONNX,
|
|
||||||
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
|
||||||
"ModelFormat::ONNX.");
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
option.trt_option.model_file = option.model_file;
|
option.trt_option.model_file = option.model_file;
|
||||||
option.trt_option.params_file = option.params_file;
|
option.trt_option.params_file = option.params_file;
|
||||||
@@ -338,40 +332,8 @@ void Runtime::CreateTrtBackend() {
|
|||||||
option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
option.trt_option.external_stream_ = option.external_stream_;
|
option.trt_option.external_stream_ = option.external_stream_;
|
||||||
backend_ = utils::make_unique<TrtBackend>();
|
backend_ = utils::make_unique<TrtBackend>();
|
||||||
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
|
backend_->benchmark_option_ = option.benchmark_option;
|
||||||
casted_backend->benchmark_option_ = option.benchmark_option;
|
FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
|
||||||
|
|
||||||
if (option.model_format == ModelFormat::ONNX) {
|
|
||||||
if (option.model_from_memory_) {
|
|
||||||
FDASSERT(
|
|
||||||
casted_backend->InitFromOnnx(option.model_file, option.trt_option),
|
|
||||||
"Load model from ONNX failed while initliazing TrtBackend.");
|
|
||||||
ReleaseModelMemoryBuffer();
|
|
||||||
} else {
|
|
||||||
std::string model_buffer = "";
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
|
||||||
"Fail to read binary from model file");
|
|
||||||
FDASSERT(casted_backend->InitFromOnnx(model_buffer, option.trt_option),
|
|
||||||
"Load model from ONNX failed while initliazing TrtBackend.");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (option.model_from_memory_) {
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(
|
|
||||||
option.model_file, option.params_file, option.trt_option),
|
|
||||||
"Load model from Paddle failed while initliazing TrtBackend.");
|
|
||||||
ReleaseModelMemoryBuffer();
|
|
||||||
} else {
|
|
||||||
std::string model_buffer = "";
|
|
||||||
std::string params_buffer = "";
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
|
||||||
"Fail to read binary from model file");
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
|
||||||
"Fail to read binary from parameter file");
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
|
||||||
option.trt_option),
|
|
||||||
"Load model from Paddle failed while initliazing TrtBackend.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
FDASSERT(false,
|
FDASSERT(false,
|
||||||
"TrtBackend is not available, please compiled with "
|
"TrtBackend is not available, please compiled with "
|
||||||
@@ -383,29 +345,18 @@ void Runtime::CreateTrtBackend() {
|
|||||||
|
|
||||||
void Runtime::CreateLiteBackend() {
|
void Runtime::CreateLiteBackend() {
|
||||||
#ifdef ENABLE_LITE_BACKEND
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
FDASSERT(option.model_from_memory_ == false,
|
|
||||||
"LiteBackend don't support to load model from memory");
|
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
|
|
||||||
option.device == Device::KUNLUNXIN ||
|
|
||||||
option.device == Device::ASCEND,
|
|
||||||
"Backend::LITE only supports "
|
|
||||||
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
|
||||||
"LiteBackend only support model format of ModelFormat::PADDLE");
|
|
||||||
backend_ = utils::make_unique<LiteBackend>();
|
backend_ = utils::make_unique<LiteBackend>();
|
||||||
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
|
backend_->benchmark_option_ = option.benchmark_option;
|
||||||
casted_backend->benchmark_option_ = option.benchmark_option;
|
|
||||||
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
FDASSERT(backend_->Init(option),
|
||||||
option.paddle_lite_option),
|
|
||||||
"Load model from nb file failed while initializing LiteBackend.");
|
"Load model from nb file failed while initializing LiteBackend.");
|
||||||
#else
|
#else
|
||||||
FDASSERT(false,
|
FDASSERT(false,
|
||||||
"LiteBackend is not available, please compiled with "
|
"LiteBackend is not available, please compiled with "
|
||||||
"ENABLE_LITE_BACKEND=ON.");
|
"ENABLE_LITE_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "."
|
FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
|
||||||
<< std::endl;
|
<< "." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateRKNPU2Backend() {
|
void Runtime::CreateRKNPU2Backend() {
|
||||||
|
Reference in New Issue
Block a user