[Other] FastDeploy TensorRT && ONNX backend support to load model form memory (#1130)

* Update all backends load model from buffer

* Delete redundant code

* Format code style

* Format code style

* Delete redundant code

* Delete redundant code

* Add some FDASSERTs

* Update load model form memory when cloning engine

* Update clone engine code

* Update set_model_buffer api parameters with char pointer

* Release memory buffer variables after finish init backends

* Fix conflict

* Fix bug
This commit is contained in:
huangjianhui
2023-02-01 11:36:09 +08:00
committed by GitHub
parent 5b7728e898
commit 76df90afc3
17 changed files with 201 additions and 154 deletions

View File

@@ -84,8 +84,6 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
.def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_)
.def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)

View File

@@ -21,6 +21,7 @@
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_type.h"
#include "fastdeploy/runtime/runtime_option.h"
namespace fastdeploy {
@@ -76,7 +77,8 @@ class BaseBackend {
// Optional: For those backends which can share memory
// while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void *stream = nullptr,
int device_id = -1) {
FDERROR << "Clone no support" << std::endl;
return nullptr;

5
fastdeploy/runtime/backends/openvino/ov_backend.cc Executable file → Normal file
View File

@@ -237,7 +237,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
option_ = option;
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
if (option_.shape_infos.size() > 0) {
std::map<std::string, ov::PartialShape> shape_infos;
for (const auto& item : option_.shape_infos) {
@@ -380,8 +379,8 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void* stream,
int device_id) {
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(
RuntimeOption& runtime_option, void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend =
utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());

View File

@@ -52,7 +52,8 @@ class OpenVINOBackend : public BaseBackend {
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override;
private:

View File

@@ -73,8 +73,8 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
}
}
bool OrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const OrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
@@ -92,7 +92,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
strcpy(ops[1].op_name, "pool2d");
strcpy(ops[1].export_op_name, "AdaptivePool2d");
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), 2,
"onnxruntime", nullptr, 0, "", &save_external)) {
@@ -112,9 +113,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
model_file_name.c_str());
f << onnx_model_proto;
f.close();
return InitFromOnnx(model_file_name, option, false);
}
return InitFromOnnx(onnx_model_proto, option, true);
return InitFromOnnx(onnx_model_proto, option);
#else
FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
"call `InitFromOnnx` instead."
@@ -124,8 +124,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
}
bool OrtBackend::InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option,
bool from_memory_buffer) {
const OrtBackendOption& option) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl;
@@ -134,17 +133,7 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
BuildOption(option);
InitCustomOperators();
if (from_memory_buffer) {
session_ = {env_, model_file.data(), model_file.size(), session_options_};
} else {
#ifdef _WIN32
session_ = {env_,
std::wstring(model_file.begin(), model_file.end()).c_str(),
session_options_};
#else
session_ = {env_, model_file.c_str(), session_options_};
#endif
}
binding_ = std::make_shared<Ort::IoBinding>(session_);
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);

View File

@@ -39,14 +39,13 @@ class OrtBackend : public BaseBackend {
void BuildOption(const OrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option = OrtBackendOption(),
bool from_memory_buffer = false);
bool InitFromOnnx(const std::string& model_buffer,
const OrtBackendOption& option = OrtBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override;

View File

@@ -39,10 +39,7 @@ struct PaddleBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
// load model and paramters from memory
bool model_from_memory_ = false;
#ifdef WITH_GPU

View File

@@ -89,9 +89,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
}
}
bool PaddleBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const PaddleBackendOption& option) {
// bool PaddleBackend::InitFromPaddle(const std::string& contents) {
if (initialized_) {
FDERROR << "PaddleBackend is already initlized, cannot initialize again."
<< std::endl;
@@ -102,16 +103,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
// PaddleReader instead now
std::string contents;
if (option.model_from_memory_) {
config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_,
params_file.c_str(), option.params_buffer_size_);
contents = model_file;
} else {
config_.SetModel(model_file, params_file);
if (!ReadBinaryFromFile(model_file, &contents)) {
return false;
}
}
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size());
contents = model_buffer;
config_.EnableMemoryOptim();
BuildOption(option);
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
@@ -172,20 +166,16 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
// Set the shape info file.
std::string curr_model_dir = "./";
if (!option.model_from_memory_) {
curr_model_dir = GetDirFromPath(model_file);
curr_model_dir = GetDirFromPath(option.model_file);
}
std::string shape_range_info =
PathJoin(curr_model_dir, "shape_range_info.pbtxt");
if (!CheckFileExists(shape_range_info)) {
FDINFO << "Start generating shape range info file." << std::endl;
paddle_infer::Config analysis_config;
if (option.model_from_memory_) {
analysis_config.SetModelBuffer(
model_file.c_str(), option.model_buffer_size_, params_file.c_str(),
option.params_buffer_size_);
} else {
analysis_config.SetModel(model_file, params_file);
}
analysis_config.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(),
params_buffer.size());
analysis_config.CollectShapeRangeInfo(shape_range_info);
auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
std::map<std::string, std::vector<int>> max_shape;
@@ -258,7 +248,8 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}
std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption& runtime_option,
void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend =
utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
@@ -266,8 +257,27 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
auto clone_option = option_;
clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream;
casted_backend->InitFromPaddle(clone_option.model_file,
clone_option.params_file, clone_option);
if (runtime_option.model_from_memory_) {
FDASSERT(
casted_backend->InitFromPaddle(runtime_option.model_buffer_,
runtime_option.params_buffer_,
clone_option),
"Clone model from Paddle failed while initialize PaddleBackend.");
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning PaddleBackend");
FDASSERT(ReadBinaryFromFile(clone_option.params_file, &params_buffer),
"Fail to read binary from parameter file while cloning "
"PaddleBackend");
FDASSERT(
casted_backend->InitFromPaddle(model_buffer, params_buffer,
clone_option),
"Clone model from Paddle failed while initialize PaddleBackend.");
}
FDWARNING << "The target device id:" << device_id
<< " is different from current device id:" << option_.gpu_id
<< ", cannot share memory with current engine." << std::endl;

View File

@@ -53,8 +53,8 @@ class PaddleBackend : public BaseBackend {
virtual ~PaddleBackend() = default;
void BuildOption(const PaddleBackendOption& option);
bool
InitFromPaddle(const std::string& model_file, const std::string& params_file,
bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const PaddleBackendOption& option = PaddleBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
@@ -64,7 +64,8 @@ class PaddleBackend : public BaseBackend {
int NumOutputs() const override { return outputs_desc_.size(); }
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override;
TensorInfo GetInputInfo(int index) override;

View File

@@ -24,6 +24,7 @@ namespace fastdeploy {
struct TrtBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
// format of input model
ModelFormat model_format = ModelFormat::AUTOREC;

View File

@@ -113,8 +113,8 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
return true;
}
bool TrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const TrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
@@ -132,7 +132,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
int model_content_size = 0;
char* calibration_cache_ptr;
int calibration_cache_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, ops.data(), 1, "tensorrt",
&calibration_cache_ptr, &calibration_cache_size, "",
@@ -141,7 +142,6 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
<< std::endl;
return false;
}
std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size);
delete[] model_content_ptr;
@@ -159,9 +159,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
model_file_name_.c_str());
f << onnx_model_proto;
f.close();
return InitFromOnnx(model_file_name_, option, false);
}
return InitFromOnnx(onnx_model_proto, option, true);
return InitFromOnnx(onnx_model_proto, option);
#else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead."
@@ -170,9 +169,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
#endif
}
bool TrtBackend::InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option,
bool from_memory_buffer) {
bool TrtBackend::InitFromOnnx(const std::string& model_buffer,
const TrtBackendOption& option) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
@@ -181,22 +179,7 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
option_ = option;
cudaSetDevice(option_.gpu_id);
std::string onnx_content = "";
if (!from_memory_buffer) {
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
onnx_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(onnx_content.at(0)), onnx_content.size());
fin.close();
} else {
onnx_content = model_file;
}
std::string onnx_content = model_buffer;
// This part of code will record the original outputs order
// because the converted tensorrt network may exist wrong order of outputs
@@ -739,21 +722,41 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
return infos;
}
std::unique_ptr<BaseBackend> TrtBackend::Clone(void* stream, int device_id) {
std::unique_ptr<BaseBackend> TrtBackend::Clone(RuntimeOption& runtime_option,
void* stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
if (device_id > 0 && device_id != option_.gpu_id) {
auto clone_option = option_;
clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream;
if (runtime_option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_buffer_,
runtime_option.params_buffer_,
clone_option),
"Clone model from Paddle failed while initialize TrtBackend.");
} else {
if (option_.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
std::string model_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning TrtBackend");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, clone_option),
"Clone model from ONNX failed while initialize TrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(
option_.model_file, option_.params_file, clone_option),
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
"Fail to read binary from model file while cloning TrtBackend");
FDASSERT(
ReadBinaryFromFile(clone_option.params_file, &params_buffer),
"Fail to read binary from parameter file while cloning TrtBackend");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
clone_option),
"Clone model from Paddle failed while initialize TrtBackend.");
}
}
FDWARNING << "The target device id:" << device_id
<< " is different from current device id:" << option_.gpu_id
<< ", cannot share memory with current engine." << std::endl;

View File

@@ -72,13 +72,12 @@ class TrtBackend : public BaseBackend {
TrtBackend() : engine_(nullptr), context_(nullptr) {}
void BuildOption(const TrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const TrtBackendOption& option = TrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option = TrtBackendOption(),
bool from_memory_buffer = false);
bool InitFromOnnx(const std::string& model_buffer,
const TrtBackendOption& option = TrtBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override;
@@ -88,7 +87,8 @@ class TrtBackend : public BaseBackend {
TensorInfo GetOutputInfo(int index);
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void* stream = nullptr,
int device_id = -1) override;
~TrtBackend() {

View File

@@ -208,6 +208,15 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
return nullptr;
}
void Runtime::ReleaseModelMemoryBuffer() {
if (option.model_from_memory_) {
option.model_buffer_.clear();
option.model_buffer_.shrink_to_fit();
option.params_buffer_.clear();
option.params_buffer_.shrink_to_fit();
}
}
void Runtime::CreatePaddleBackend() {
FDASSERT(
option.device == Device::CPU || option.device == Device::GPU ||
@@ -231,12 +240,6 @@ void Runtime::CreatePaddleBackend() {
pd_option.enable_pinned_memory = option.enable_pinned_memory;
pd_option.external_stream_ = option.external_stream_;
pd_option.model_from_memory_ = option.model_from_memory_;
if (pd_option.model_from_memory_) {
pd_option.model_buffer_ = option.model_buffer_;
pd_option.params_buffer_ = option.params_buffer_;
pd_option.model_buffer_size_ = option.model_buffer_size_;
pd_option.params_buffer_size_ = option.params_buffer_size_;
}
#ifdef ENABLE_TRT_BACKEND
if (pd_option.use_gpu && option.pd_enable_trt) {
pd_option.enable_trt = true;
@@ -276,9 +279,16 @@ void Runtime::CreatePaddleBackend() {
FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
option.params_buffer_, pd_option),
"Load model from Paddle failed while initliazing PaddleBackend.");
ReleaseModelMemoryBuffer();
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, pd_option),
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(
casted_backend->InitFromPaddle(model_buffer, params_buffer, pd_option),
"Load model from Paddle failed while initliazing PaddleBackend.");
}
#else
@@ -291,6 +301,10 @@ void Runtime::CreatePaddleBackend() {
}
void Runtime::CreateOpenVINOBackend() {
// TODO(huangjianhui) OpenVINO only supports to load ONNX format model from
// memory Temporarily disable this function
FDASSERT(option.model_from_memory_ == false,
"OpenVINOBackend don't support to load model from memory");
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
@@ -342,16 +356,37 @@ void Runtime::CreateOrtBackend() {
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
ort_option.gpu_id = option.device_id;
ort_option.external_stream_ = option.external_stream_;
backend_ = utils::make_unique<OrtBackend>();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, ort_option),
"Load model from ONNX failed while initliazing OrtBackend.");
ReleaseModelMemoryBuffer();
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, ort_option),
std::string model_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
"Load model from ONNX failed while initliazing OrtBackend.");
}
} else {
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(
option.model_buffer_, option.params_buffer_, ort_option),
"Load model from Paddle failed while initliazing OrtBackend.");
ReleaseModelMemoryBuffer();
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
ort_option),
"Load model from Paddle failed while initliazing OrtBackend.");
}
}
#else
FDASSERT(false,
@@ -385,16 +420,37 @@ void Runtime::CreateTrtBackend() {
trt_option.serialize_file = option.trt_serialize_file;
trt_option.enable_pinned_memory = option.enable_pinned_memory;
trt_option.external_stream_ = option.external_stream_;
backend_ = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, trt_option),
"Load model from ONNX failed while initliazing TrtBackend.");
ReleaseModelMemoryBuffer();
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, trt_option),
std::string model_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(casted_backend->InitFromOnnx(model_buffer, trt_option),
"Load model from ONNX failed while initliazing TrtBackend.");
}
} else {
if (option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(
option.model_buffer_, option.params_buffer_, trt_option),
"Load model from Paddle failed while initliazing TrtBackend.");
ReleaseModelMemoryBuffer();
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Fail to read binary from model file");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Fail to read binary from parameter file");
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
trt_option),
"Load model from Paddle failed while initliazing TrtBackend.");
}
}
#else
FDASSERT(false,
@@ -406,6 +462,9 @@ void Runtime::CreateTrtBackend() {
}
void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
FDASSERT(option.model_from_memory_ == false,
"LiteBackend don't support to load model from memory");
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND,
@@ -413,7 +472,6 @@ void Runtime::CreateLiteBackend() {
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE");
#ifdef ENABLE_LITE_BACKEND
backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
@@ -429,6 +487,8 @@ void Runtime::CreateLiteBackend() {
}
void Runtime::CreateRKNPU2Backend() {
FDASSERT(option.model_from_memory_ == false,
"RKNPU2Backend don't support to load model from memory");
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
FDASSERT(option.model_format == ModelFormat::RKNN,
@@ -451,11 +511,14 @@ void Runtime::CreateRKNPU2Backend() {
}
void Runtime::CreateSophgoNPUBackend() {
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
FDASSERT(option.model_from_memory_ == false,
"SophgoBackend don't support to load model from memory");
FDASSERT(option.device == Device::SOPHGOTPUD,
"Backend::SOPHGO only supports Device::SOPHGO");
FDASSERT(option.model_format == ModelFormat::SOPHGO,
"SophgoBackend only support model format of ModelFormat::SOPHGO");
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
backend_ = utils::make_unique<SophgoBackend>();
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
@@ -486,7 +549,7 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
<< option.device << "." << std::endl;
runtime->option = option;
runtime->backend_ = backend_->Clone(stream, device_id);
runtime->backend_ = backend_->Clone(option, stream, device_id);
return runtime;
}

View File

@@ -83,6 +83,8 @@ struct FASTDEPLOY_DECL Runtime {
*/
Runtime* Clone(void* stream = nullptr, int device_id = -1);
void ReleaseModelMemoryBuffer();
RuntimeOption option;
/** \brief Compile TorchScript Module, only for Poros backend

View File

@@ -38,29 +38,21 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
}
}
void RuntimeOption::SetModelBuffer(const char* model_buffer,
size_t model_buffer_size,
const char* params_buffer,
size_t params_buffer_size,
void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
const std::string& params_buffer,
const ModelFormat& format) {
model_buffer_size_ = model_buffer_size;
params_buffer_size_ = params_buffer_size;
model_from_memory_ = true;
if (format == ModelFormat::PADDLE) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
params_buffer_ =
std::string(params_buffer, params_buffer + params_buffer_size);
model_buffer_ = model_buffer;
params_buffer_ = params_buffer;
model_format = ModelFormat::PADDLE;
} else if (format == ModelFormat::ONNX) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
model_buffer_ = model_buffer;
model_format = ModelFormat::ONNX;
} else if (format == ModelFormat::TORCHSCRIPT) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
model_format = ModelFormat::TORCHSCRIPT;
} else {
FDASSERT(false,
"The model format only can be "
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
"ModelFormat::PADDLE/ModelFormat::ONNX.");
}
}

View File

@@ -50,14 +50,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
*
* \param[in] model_buffer The memory buffer of model
* \param[in] model_buffer_size The size of the model data
* \param[in] params_buffer The memory buffer of the combined parameters file
* \param[in] params_buffer_size The size of the combined parameters data
* \param[in] model_buffer The string of model memory buffer
* \param[in] params_buffer The string of parameters memory buffer
* \param[in] format Format of the loaded model
*/
void SetModelBuffer(const char* model_buffer, size_t model_buffer_size,
const char* params_buffer, size_t params_buffer_size,
void SetModelBuffer(const std::string& model_buffer,
const std::string& params_buffer = "",
const ModelFormat& format = ModelFormat::PADDLE);
/// Use cpu to inference, the runtime will inference on CPU by default
@@ -431,8 +429,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false;
};

View File

@@ -229,20 +229,14 @@ class RuntimeOption:
def set_model_buffer(self,
model_buffer,
model_buffer_size,
params_buffer,
params_buffer_size,
params_buffer="",
model_format=ModelFormat.PADDLE):
"""Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
:param model_buffer: (bytes)The memory buffer of model
:param model_buffer_size: (unsigned int)The size of the model data.
:param params_buffer: (bytes)The memory buffer of the combined parameters file
:param params_buffer_size: (unsigned inst)The size of the combined parameters data
:param params_buffer: (bytes)The memory buffer of the parameters
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
"""
return self._option.set_model_buffer(model_buffer, model_buffer_size,
params_buffer, params_buffer_size,
return self._option.set_model_buffer(model_buffer, params_buffer,
model_format)
def use_gpu(self, device_id=0):