[Other] FastDeploy TensorRT && ONNX backend support to load model form memory (#1130)

* Update all backends load model from buffer

* Delete redundant code

* Format code style

* Format code style

* Delete redundant code

* Delete redundant code

* Add some FDASSERTs

* Update load model form memory when cloning engine

* Update clone engine code

* Update set_model_buffer api parameters with char pointer

* Release memory buffer variables after finish init backends

* Fix conflict

* Fix bug
This commit is contained in:
huangjianhui
2023-02-01 11:36:09 +08:00
committed by GitHub
parent 5b7728e898
commit 76df90afc3
17 changed files with 201 additions and 154 deletions

View File

@@ -84,8 +84,6 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("backend", &RuntimeOption::backend) .def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("external_stream", &RuntimeOption::external_stream_) .def_readwrite("external_stream", &RuntimeOption::external_stream_)
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_) .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
.def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_)
.def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id) .def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device) .def_readwrite("device", &RuntimeOption::device)

View File

@@ -21,6 +21,7 @@
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_type.h" #include "fastdeploy/core/fd_type.h"
#include "fastdeploy/runtime/runtime_option.h"
namespace fastdeploy { namespace fastdeploy {
@@ -76,7 +77,8 @@ class BaseBackend {
// Optional: For those backends which can share memory // Optional: For those backends which can share memory
// while creating multiple inference engines with same model file // while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr, virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void *stream = nullptr,
int device_id = -1) { int device_id = -1) {
FDERROR << "Clone no support" << std::endl; FDERROR << "Clone no support" << std::endl;
return nullptr; return nullptr;

5
fastdeploy/runtime/backends/openvino/ov_backend.cc Executable file → Normal file
View File

@@ -237,7 +237,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
option_ = option; option_ = option;
std::shared_ptr<ov::Model> model = core_.read_model(model_file); std::shared_ptr<ov::Model> model = core_.read_model(model_file);
if (option_.shape_infos.size() > 0) { if (option_.shape_infos.size() > 0) {
std::map<std::string, ov::PartialShape> shape_infos; std::map<std::string, ov::PartialShape> shape_infos;
for (const auto& item : option_.shape_infos) { for (const auto& item : option_.shape_infos) {
@@ -380,8 +379,8 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
return true; return true;
} }
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void* stream, std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(
int device_id) { RuntimeOption& runtime_option, void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = std::unique_ptr<BaseBackend> new_backend =
utils::make_unique<OpenVINOBackend>(); utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get()); auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());

View File

@@ -52,7 +52,8 @@ class OpenVINOBackend : public BaseBackend {
std::vector<TensorInfo> GetInputInfos() override; std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override; std::vector<TensorInfo> GetOutputInfos() override;
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr, std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override; int device_id = -1) override;
private: private:

View File

@@ -73,8 +73,8 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
} }
} }
bool OrtBackend::InitFromPaddle(const std::string& model_file, bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_file, const std::string& params_buffer,
const OrtBackendOption& option, bool verbose) { const OrtBackendOption& option, bool verbose) {
if (initialized_) { if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again." FDERROR << "OrtBackend is already initlized, cannot initialize again."
@@ -92,7 +92,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
strcpy(ops[1].op_name, "pool2d"); strcpy(ops[1].op_name, "pool2d");
strcpy(ops[1].export_op_name, "AdaptivePool2d"); strcpy(ops[1].export_op_name, "AdaptivePool2d");
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size(),
&model_content_ptr, &model_content_size, 11, true, &model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), 2, verbose, true, true, true, ops.data(), 2,
"onnxruntime", nullptr, 0, "", &save_external)) { "onnxruntime", nullptr, 0, "", &save_external)) {
@@ -112,9 +113,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
model_file_name.c_str()); model_file_name.c_str());
f << onnx_model_proto; f << onnx_model_proto;
f.close(); f.close();
return InitFromOnnx(model_file_name, option, false);
} }
return InitFromOnnx(onnx_model_proto, option, true); return InitFromOnnx(onnx_model_proto, option);
#else #else
FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to " FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
"call `InitFromOnnx` instead." "call `InitFromOnnx` instead."
@@ -124,8 +124,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
} }
bool OrtBackend::InitFromOnnx(const std::string& model_file, bool OrtBackend::InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option, const OrtBackendOption& option) {
bool from_memory_buffer) {
if (initialized_) { if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again." FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl; << std::endl;
@@ -134,17 +133,7 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
BuildOption(option); BuildOption(option);
InitCustomOperators(); InitCustomOperators();
if (from_memory_buffer) {
session_ = {env_, model_file.data(), model_file.size(), session_options_}; session_ = {env_, model_file.data(), model_file.size(), session_options_};
} else {
#ifdef _WIN32
session_ = {env_,
std::wstring(model_file.begin(), model_file.end()).c_str(),
session_options_};
#else
session_ = {env_, model_file.c_str(), session_options_};
#endif
}
binding_ = std::make_shared<Ort::IoBinding>(session_); binding_ = std::make_shared<Ort::IoBinding>(session_);
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);

View File

@@ -39,14 +39,13 @@ class OrtBackend : public BaseBackend {
void BuildOption(const OrtBackendOption& option); void BuildOption(const OrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file, bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_file, const std::string& params_buffer,
const OrtBackendOption& option = OrtBackendOption(), const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false); bool verbose = false);
bool InitFromOnnx(const std::string& model_file, bool InitFromOnnx(const std::string& model_buffer,
const OrtBackendOption& option = OrtBackendOption(), const OrtBackendOption& option = OrtBackendOption());
bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;

View File

@@ -39,10 +39,7 @@ struct PaddleBackendOption {
std::string model_file = ""; // Path of model file std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty std::string params_file = ""; // Path of parameters file, can be empty
std::string model_buffer_ = ""; // load model and paramters from memory
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false; bool model_from_memory_ = false;
#ifdef WITH_GPU #ifdef WITH_GPU

View File

@@ -89,9 +89,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
} }
} }
bool PaddleBackend::InitFromPaddle(const std::string& model_file, bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_file, const std::string& params_buffer,
const PaddleBackendOption& option) { const PaddleBackendOption& option) {
// bool PaddleBackend::InitFromPaddle(const std::string& contents) {
if (initialized_) { if (initialized_) {
FDERROR << "PaddleBackend is already initlized, cannot initialize again." FDERROR << "PaddleBackend is already initlized, cannot initialize again."
<< std::endl; << std::endl;
@@ -102,16 +103,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
// PaddleReader instead now // PaddleReader instead now
std::string contents; std::string contents;
if (option.model_from_memory_) { config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_buffer.c_str(), params_buffer.size());
params_file.c_str(), option.params_buffer_size_); contents = model_buffer;
contents = model_file;
} else {
config_.SetModel(model_file, params_file);
if (!ReadBinaryFromFile(model_file, &contents)) {
return false;
}
}
config_.EnableMemoryOptim(); config_.EnableMemoryOptim();
BuildOption(option); BuildOption(option);
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
@@ -172,20 +166,16 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
// Set the shape info file. // Set the shape info file.
std::string curr_model_dir = "./"; std::string curr_model_dir = "./";
if (!option.model_from_memory_) { if (!option.model_from_memory_) {
curr_model_dir = GetDirFromPath(model_file); curr_model_dir = GetDirFromPath(option.model_file);
} }
std::string shape_range_info = std::string shape_range_info =
PathJoin(curr_model_dir, "shape_range_info.pbtxt"); PathJoin(curr_model_dir, "shape_range_info.pbtxt");
if (!CheckFileExists(shape_range_info)) { if (!CheckFileExists(shape_range_info)) {
FDINFO << "Start generating shape range info file." << std::endl; FDINFO << "Start generating shape range info file." << std::endl;
paddle_infer::Config analysis_config; paddle_infer::Config analysis_config;
if (option.model_from_memory_) { analysis_config.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
analysis_config.SetModelBuffer( params_buffer.c_str(),
model_file.c_str(), option.model_buffer_size_, params_file.c_str(), params_buffer.size());
option.params_buffer_size_);
} else {
analysis_config.SetModel(model_file, params_file);
}
analysis_config.CollectShapeRangeInfo(shape_range_info); analysis_config.CollectShapeRangeInfo(shape_range_info);
auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config); auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
std::map<std::string, std::vector<int>> max_shape; std::map<std::string, std::vector<int>> max_shape;
@@ -258,7 +248,8 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
return true; return true;
} }
std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) { std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption& runtime_option,
void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = std::unique_ptr<BaseBackend> new_backend =
utils::make_unique<PaddleBackend>(); utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get()); auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
@@ -266,8 +257,27 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
auto clone_option = option_; auto clone_option = option_;
clone_option.gpu_id = device_id; clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream; clone_option.external_stream_ = stream;
casted_backend->InitFromPaddle(clone_option.model_file, if (runtime_option.model_from_memory_) {
clone_option.params_file, clone_option); FDASSERT(
casted_backend->InitFromPaddle(runtime_option.model_buffer_,
runtime_option.params_buffer_,
clone_option),
"Clone model from Paddle failed while initialize PaddleBackend.");
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning PaddleBackend");
FDASSERT(ReadBinaryFromFile(clone_option.params_file, &params_buffer),
"Fail to read binary from parameter file while cloning "
"PaddleBackend");
FDASSERT(
casted_backend->InitFromPaddle(model_buffer, params_buffer,
clone_option),
"Clone model from Paddle failed while initialize PaddleBackend.");
}
FDWARNING << "The target device id:" << device_id FDWARNING << "The target device id:" << device_id
<< " is different from current device id:" << option_.gpu_id << " is different from current device id:" << option_.gpu_id
<< ", cannot share memory with current engine." << std::endl; << ", cannot share memory with current engine." << std::endl;

View File

@@ -53,8 +53,8 @@ class PaddleBackend : public BaseBackend {
virtual ~PaddleBackend() = default; virtual ~PaddleBackend() = default;
void BuildOption(const PaddleBackendOption& option); void BuildOption(const PaddleBackendOption& option);
bool bool InitFromPaddle(const std::string& model_buffer,
InitFromPaddle(const std::string& model_file, const std::string& params_file, const std::string& params_buffer,
const PaddleBackendOption& option = PaddleBackendOption()); const PaddleBackendOption& option = PaddleBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
@@ -64,7 +64,8 @@ class PaddleBackend : public BaseBackend {
int NumOutputs() const override { return outputs_desc_.size(); } int NumOutputs() const override { return outputs_desc_.size(); }
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr, std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override; int device_id = -1) override;
TensorInfo GetInputInfo(int index) override; TensorInfo GetInputInfo(int index) override;

View File

@@ -24,6 +24,7 @@ namespace fastdeploy {
struct TrtBackendOption { struct TrtBackendOption {
std::string model_file = ""; // Path of model file std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty std::string params_file = ""; // Path of parameters file, can be empty
// format of input model // format of input model
ModelFormat model_format = ModelFormat::AUTOREC; ModelFormat model_format = ModelFormat::AUTOREC;

View File

@@ -113,8 +113,8 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
return true; return true;
} }
bool TrtBackend::InitFromPaddle(const std::string& model_file, bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_file, const std::string& params_buffer,
const TrtBackendOption& option, bool verbose) { const TrtBackendOption& option, bool verbose) {
if (initialized_) { if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again." FDERROR << "TrtBackend is already initlized, cannot initialize again."
@@ -132,7 +132,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
int model_content_size = 0; int model_content_size = 0;
char* calibration_cache_ptr; char* calibration_cache_ptr;
int calibration_cache_size = 0; int calibration_cache_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size(),
&model_content_ptr, &model_content_size, 11, true, &model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), 1, "tensorrt", verbose, true, true, true, ops.data(), 1, "tensorrt",
&calibration_cache_ptr, &calibration_cache_size, "", &calibration_cache_ptr, &calibration_cache_size, "",
@@ -141,7 +142,6 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
<< std::endl; << std::endl;
return false; return false;
} }
std::string onnx_model_proto(model_content_ptr, std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size); model_content_ptr + model_content_size);
delete[] model_content_ptr; delete[] model_content_ptr;
@@ -159,9 +159,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
model_file_name_.c_str()); model_file_name_.c_str());
f << onnx_model_proto; f << onnx_model_proto;
f.close(); f.close();
return InitFromOnnx(model_file_name_, option, false);
} }
return InitFromOnnx(onnx_model_proto, option, true); return InitFromOnnx(onnx_model_proto, option);
#else #else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to " FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead." "call `InitFromOnnx` instead."
@@ -170,9 +169,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
#endif #endif
} }
bool TrtBackend::InitFromOnnx(const std::string& model_file, bool TrtBackend::InitFromOnnx(const std::string& model_buffer,
const TrtBackendOption& option, const TrtBackendOption& option) {
bool from_memory_buffer) {
if (initialized_) { if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again." FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl; << std::endl;
@@ -181,22 +179,7 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
option_ = option; option_ = option;
cudaSetDevice(option_.gpu_id); cudaSetDevice(option_.gpu_id);
std::string onnx_content = ""; std::string onnx_content = model_buffer;
if (!from_memory_buffer) {
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
onnx_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(onnx_content.at(0)), onnx_content.size());
fin.close();
} else {
onnx_content = model_file;
}
// This part of code will record the original outputs order // This part of code will record the original outputs order
// because the converted tensorrt network may exist wrong order of outputs // because the converted tensorrt network may exist wrong order of outputs
@@ -739,21 +722,41 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
return infos; return infos;
} }
std::unique_ptr<BaseBackend> TrtBackend::Clone(void* stream, int device_id) { std::unique_ptr<BaseBackend> TrtBackend::Clone(RuntimeOption& runtime_option,
void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>(); std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get()); auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
if (device_id > 0 && device_id != option_.gpu_id) { if (device_id > 0 && device_id != option_.gpu_id) {
auto clone_option = option_; auto clone_option = option_;
clone_option.gpu_id = device_id; clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream; clone_option.external_stream_ = stream;
if (runtime_option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_buffer_,
runtime_option.params_buffer_,
clone_option),
"Clone model from Paddle failed while initialize TrtBackend.");
} else {
if (option_.model_format == ModelFormat::ONNX) { if (option_.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option), std::string model_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning TrtBackend");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, clone_option),
"Clone model from ONNX failed while initialize TrtBackend."); "Clone model from ONNX failed while initialize TrtBackend.");
} else { } else {
FDASSERT(casted_backend->InitFromPaddle( std::string model_buffer = "";
option_.model_file, option_.params_file, clone_option), std::string params_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning TrtBackend");
FDASSERT(
ReadBinaryFromFile(clone_option.params_file, &params_buffer),
"Fail to read binary from parameter file while cloning TrtBackend");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
clone_option),
"Clone model from Paddle failed while initialize TrtBackend."); "Clone model from Paddle failed while initialize TrtBackend.");
} }
}
FDWARNING << "The target device id:" << device_id FDWARNING << "The target device id:" << device_id
<< " is different from current device id:" << option_.gpu_id << " is different from current device id:" << option_.gpu_id
<< ", cannot share memory with current engine." << std::endl; << ", cannot share memory with current engine." << std::endl;

View File

@@ -72,13 +72,12 @@ class TrtBackend : public BaseBackend {
TrtBackend() : engine_(nullptr), context_(nullptr) {} TrtBackend() : engine_(nullptr), context_(nullptr) {}
void BuildOption(const TrtBackendOption& option); void BuildOption(const TrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file, bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_file, const std::string& params_buffer,
const TrtBackendOption& option = TrtBackendOption(), const TrtBackendOption& option = TrtBackendOption(),
bool verbose = false); bool verbose = false);
bool InitFromOnnx(const std::string& model_file, bool InitFromOnnx(const std::string& model_buffer,
const TrtBackendOption& option = TrtBackendOption(), const TrtBackendOption& option = TrtBackendOption());
bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs, bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override; bool copy_to_fd = true) override;
@@ -88,7 +87,8 @@ class TrtBackend : public BaseBackend {
TensorInfo GetOutputInfo(int index); TensorInfo GetOutputInfo(int index);
std::vector<TensorInfo> GetInputInfos() override; std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override; std::vector<TensorInfo> GetOutputInfos() override;
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr, std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override; int device_id = -1) override;
~TrtBackend() { ~TrtBackend() {

View File

@@ -208,6 +208,15 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
return nullptr; return nullptr;
} }
void Runtime::ReleaseModelMemoryBuffer() {
if (option.model_from_memory_) {
option.model_buffer_.clear();
option.model_buffer_.shrink_to_fit();
option.params_buffer_.clear();
option.params_buffer_.shrink_to_fit();
}
}
void Runtime::CreatePaddleBackend() { void Runtime::CreatePaddleBackend() {
FDASSERT( FDASSERT(
option.device == Device::CPU || option.device == Device::GPU || option.device == Device::CPU || option.device == Device::GPU ||
@@ -231,12 +240,6 @@ void Runtime::CreatePaddleBackend() {
pd_option.enable_pinned_memory = option.enable_pinned_memory; pd_option.enable_pinned_memory = option.enable_pinned_memory;
pd_option.external_stream_ = option.external_stream_; pd_option.external_stream_ = option.external_stream_;
pd_option.model_from_memory_ = option.model_from_memory_; pd_option.model_from_memory_ = option.model_from_memory_;
if (pd_option.model_from_memory_) {
pd_option.model_buffer_ = option.model_buffer_;
pd_option.params_buffer_ = option.params_buffer_;
pd_option.model_buffer_size_ = option.model_buffer_size_;
pd_option.params_buffer_size_ = option.params_buffer_size_;
}
#ifdef ENABLE_TRT_BACKEND #ifdef ENABLE_TRT_BACKEND
if (pd_option.use_gpu && option.pd_enable_trt) { if (pd_option.use_gpu && option.pd_enable_trt) {
pd_option.enable_trt = true; pd_option.enable_trt = true;
@@ -276,9 +279,16 @@ void Runtime::CreatePaddleBackend() {
FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
option.params_buffer_, pd_option), option.params_buffer_, pd_option),
"Load model from Paddle failed while initliazing PaddleBackend."); "Load model from Paddle failed while initliazing PaddleBackend.");
ReleaseModelMemoryBuffer();
} else { } else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file, std::string model_buffer = "";
option.params_file, pd_option), std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(
casted_backend->InitFromPaddle(model_buffer, params_buffer, pd_option),
"Load model from Paddle failed while initliazing PaddleBackend."); "Load model from Paddle failed while initliazing PaddleBackend.");
} }
#else #else
@@ -291,6 +301,10 @@ void Runtime::CreatePaddleBackend() {
} }
void Runtime::CreateOpenVINOBackend() { void Runtime::CreateOpenVINOBackend() {
// TODO(huangjianhui) OpenVINO only supports to load ONNX format model from
// memory Temporarily disable this function
FDASSERT(option.model_from_memory_ == false,
"OpenVINOBackend don't support to load model from memory");
FDASSERT(option.device == Device::CPU, FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU"); "Backend::OPENVINO only supports Device::CPU");
FDASSERT(option.model_format == ModelFormat::PADDLE || FDASSERT(option.model_format == ModelFormat::PADDLE ||
@@ -342,16 +356,37 @@ void Runtime::CreateOrtBackend() {
ort_option.use_gpu = (option.device == Device::GPU) ? true : false; ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
ort_option.gpu_id = option.device_id; ort_option.gpu_id = option.device_id;
ort_option.external_stream_ = option.external_stream_; ort_option.external_stream_ = option.external_stream_;
backend_ = utils::make_unique<OrtBackend>(); backend_ = utils::make_unique<OrtBackend>();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get()); auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) { if (option.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, ort_option),
"Load model from ONNX failed while initliazing OrtBackend."); "Load model from ONNX failed while initliazing OrtBackend.");
ReleaseModelMemoryBuffer();
} else { } else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file, std::string model_buffer = "";
option.params_file, ort_option), FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
"Load model from ONNX failed while initliazing OrtBackend.");
}
} else {
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(
option.model_buffer_, option.params_buffer_, ort_option),
"Load model from Paddle failed while initliazing OrtBackend."); "Load model from Paddle failed while initliazing OrtBackend.");
ReleaseModelMemoryBuffer();
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
ort_option),
"Load model from Paddle failed while initliazing OrtBackend.");
}
} }
#else #else
FDASSERT(false, FDASSERT(false,
@@ -385,16 +420,37 @@ void Runtime::CreateTrtBackend() {
trt_option.serialize_file = option.trt_serialize_file; trt_option.serialize_file = option.trt_serialize_file;
trt_option.enable_pinned_memory = option.enable_pinned_memory; trt_option.enable_pinned_memory = option.enable_pinned_memory;
trt_option.external_stream_ = option.external_stream_; trt_option.external_stream_ = option.external_stream_;
backend_ = utils::make_unique<TrtBackend>(); backend_ = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get()); auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) { if (option.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option), if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, trt_option),
"Load model from ONNX failed while initliazing TrtBackend."); "Load model from ONNX failed while initliazing TrtBackend.");
ReleaseModelMemoryBuffer();
} else { } else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file, std::string model_buffer = "";
option.params_file, trt_option), FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, trt_option),
"Load model from ONNX failed while initliazing TrtBackend.");
}
} else {
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(
option.model_buffer_, option.params_buffer_, trt_option),
"Load model from Paddle failed while initliazing TrtBackend."); "Load model from Paddle failed while initliazing TrtBackend.");
ReleaseModelMemoryBuffer();
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
trt_option),
"Load model from Paddle failed while initliazing TrtBackend.");
}
} }
#else #else
FDASSERT(false, FDASSERT(false,
@@ -406,6 +462,9 @@ void Runtime::CreateTrtBackend() {
} }
void Runtime::CreateLiteBackend() { void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
FDASSERT(option.model_from_memory_ == false,
"LiteBackend don't support to load model from memory");
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN || option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND, option.device == Device::ASCEND,
@@ -413,7 +472,6 @@ void Runtime::CreateLiteBackend() {
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND."); "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
FDASSERT(option.model_format == ModelFormat::PADDLE, FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE"); "LiteBackend only support model format of ModelFormat::PADDLE");
#ifdef ENABLE_LITE_BACKEND
backend_ = utils::make_unique<LiteBackend>(); backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get()); auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
@@ -429,6 +487,8 @@ void Runtime::CreateLiteBackend() {
} }
void Runtime::CreateRKNPU2Backend() { void Runtime::CreateRKNPU2Backend() {
FDASSERT(option.model_from_memory_ == false,
"RKNPU2Backend don't support to load model from memory");
FDASSERT(option.device == Device::RKNPU, FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2"); "Backend::RKNPU2 only supports Device::RKNPU2");
FDASSERT(option.model_format == ModelFormat::RKNN, FDASSERT(option.model_format == ModelFormat::RKNN,
@@ -451,11 +511,14 @@ void Runtime::CreateRKNPU2Backend() {
} }
void Runtime::CreateSophgoNPUBackend() { void Runtime::CreateSophgoNPUBackend() {
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
FDASSERT(option.model_from_memory_ == false,
"SophgoBackend don't support to load model from memory");
FDASSERT(option.device == Device::SOPHGOTPUD, FDASSERT(option.device == Device::SOPHGOTPUD,
"Backend::SOPHGO only supports Device::SOPHGO"); "Backend::SOPHGO only supports Device::SOPHGO");
FDASSERT(option.model_format == ModelFormat::SOPHGO, FDASSERT(option.model_format == ModelFormat::SOPHGO,
"SophgoBackend only support model format of ModelFormat::SOPHGO"); "SophgoBackend only support model format of ModelFormat::SOPHGO");
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption(); auto sophgo_option = SophgoBackendOption();
backend_ = utils::make_unique<SophgoBackend>(); backend_ = utils::make_unique<SophgoBackend>();
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get()); auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
@@ -486,7 +549,7 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
FDINFO << "Runtime Clone with Backend:: " << option.backend << " in " FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
<< option.device << "." << std::endl; << option.device << "." << std::endl;
runtime->option = option; runtime->option = option;
runtime->backend_ = backend_->Clone(stream, device_id); runtime->backend_ = backend_->Clone(option, stream, device_id);
return runtime; return runtime;
} }

View File

@@ -83,6 +83,8 @@ struct FASTDEPLOY_DECL Runtime {
*/ */
Runtime* Clone(void* stream = nullptr, int device_id = -1); Runtime* Clone(void* stream = nullptr, int device_id = -1);
void ReleaseModelMemoryBuffer();
RuntimeOption option; RuntimeOption option;
/** \brief Compile TorchScript Module, only for Poros backend /** \brief Compile TorchScript Module, only for Poros backend

View File

@@ -38,29 +38,21 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
} }
} }
void RuntimeOption::SetModelBuffer(const char* model_buffer, void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
size_t model_buffer_size, const std::string& params_buffer,
const char* params_buffer,
size_t params_buffer_size,
const ModelFormat& format) { const ModelFormat& format) {
model_buffer_size_ = model_buffer_size;
params_buffer_size_ = params_buffer_size;
model_from_memory_ = true; model_from_memory_ = true;
if (format == ModelFormat::PADDLE) { if (format == ModelFormat::PADDLE) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); model_buffer_ = model_buffer;
params_buffer_ = params_buffer_ = params_buffer;
std::string(params_buffer, params_buffer + params_buffer_size);
model_format = ModelFormat::PADDLE; model_format = ModelFormat::PADDLE;
} else if (format == ModelFormat::ONNX) { } else if (format == ModelFormat::ONNX) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); model_buffer_ = model_buffer;
model_format = ModelFormat::ONNX; model_format = ModelFormat::ONNX;
} else if (format == ModelFormat::TORCHSCRIPT) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
model_format = ModelFormat::TORCHSCRIPT;
} else { } else {
FDASSERT(false, FDASSERT(false,
"The model format only can be " "The model format only can be "
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); "ModelFormat::PADDLE/ModelFormat::ONNX.");
} }
} }

View File

@@ -50,14 +50,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
* *
* \param[in] model_buffer The memory buffer of model * \param[in] model_buffer The string of model memory buffer
* \param[in] model_buffer_size The size of the model data * \param[in] params_buffer The string of parameters memory buffer
* \param[in] params_buffer The memory buffer of the combined parameters file
* \param[in] params_buffer_size The size of the combined parameters data
* \param[in] format Format of the loaded model * \param[in] format Format of the loaded model
*/ */
void SetModelBuffer(const char* model_buffer, size_t model_buffer_size, void SetModelBuffer(const std::string& model_buffer,
const char* params_buffer, size_t params_buffer_size, const std::string& params_buffer = "",
const ModelFormat& format = ModelFormat::PADDLE); const ModelFormat& format = ModelFormat::PADDLE);
/// Use cpu to inference, the runtime will inference on CPU by default /// Use cpu to inference, the runtime will inference on CPU by default
@@ -431,8 +429,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
std::string model_buffer_ = ""; std::string model_buffer_ = "";
std::string params_buffer_ = ""; std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false; bool model_from_memory_ = false;
}; };

View File

@@ -229,20 +229,14 @@ class RuntimeOption:
def set_model_buffer(self, def set_model_buffer(self,
model_buffer, model_buffer,
model_buffer_size, params_buffer="",
params_buffer,
params_buffer_size,
model_format=ModelFormat.PADDLE): model_format=ModelFormat.PADDLE):
"""Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
:param model_buffer: (bytes)The memory buffer of model :param model_buffer: (bytes)The memory buffer of model
:param model_buffer_size: (unsigned int)The size of the model data. :param params_buffer: (bytes)The memory buffer of the parameters
:param params_buffer: (bytes)The memory buffer of the combined parameters file
:param params_buffer_size: (unsigned inst)The size of the combined parameters data
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
""" """
return self._option.set_model_buffer(model_buffer, model_buffer_size, return self._option.set_model_buffer(model_buffer, params_buffer,
params_buffer, params_buffer_size,
model_format) model_format)
def use_gpu(self, device_id=0): def use_gpu(self, device_id=0):