mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-21 15:49:31 +08:00
[Other] FastDeploy TensorRT && ONNX backend support to load model form memory (#1130)
* Update all backends load model from buffer * Delete redundant code * Format code style * Format code style * Delete redundant code * Delete redundant code * Add some FDASSERTs * Update load model form memory when cloning engine * Update clone engine code * Update set_model_buffer api parameters with char pointer * Release memory buffer variables after finish init backends * Fix conflict * Fix bug
This commit is contained in:
@@ -84,8 +84,6 @@ void BindRuntime(pybind11::module& m) {
|
||||
.def_readwrite("backend", &RuntimeOption::backend)
|
||||
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
|
||||
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
|
||||
.def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_)
|
||||
.def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_)
|
||||
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
||||
.def_readwrite("device_id", &RuntimeOption::device_id)
|
||||
.def_readwrite("device", &RuntimeOption::device)
|
||||
|
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
#include "fastdeploy/core/fd_type.h"
|
||||
#include "fastdeploy/runtime/runtime_option.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
@@ -76,7 +77,8 @@ class BaseBackend {
|
||||
|
||||
// Optional: For those backends which can share memory
|
||||
// while creating multiple inference engines with same model file
|
||||
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
void *stream = nullptr,
|
||||
int device_id = -1) {
|
||||
FDERROR << "Clone no support" << std::endl;
|
||||
return nullptr;
|
||||
|
5
fastdeploy/runtime/backends/openvino/ov_backend.cc
Executable file → Normal file
5
fastdeploy/runtime/backends/openvino/ov_backend.cc
Executable file → Normal file
@@ -237,7 +237,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
|
||||
option_ = option;
|
||||
|
||||
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
|
||||
|
||||
if (option_.shape_infos.size() > 0) {
|
||||
std::map<std::string, ov::PartialShape> shape_infos;
|
||||
for (const auto& item : option_.shape_infos) {
|
||||
@@ -380,8 +379,8 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void* stream,
|
||||
int device_id) {
|
||||
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(
|
||||
RuntimeOption& runtime_option, void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend =
|
||||
utils::make_unique<OpenVINOBackend>();
|
||||
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
|
||||
|
@@ -52,7 +52,8 @@ class OpenVINOBackend : public BaseBackend {
|
||||
std::vector<TensorInfo> GetInputInfos() override;
|
||||
std::vector<TensorInfo> GetOutputInfos() override;
|
||||
|
||||
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
|
||||
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
void* stream = nullptr,
|
||||
int device_id = -1) override;
|
||||
|
||||
private:
|
||||
|
@@ -73,8 +73,8 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
|
||||
}
|
||||
}
|
||||
|
||||
bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const OrtBackendOption& option, bool verbose) {
|
||||
if (initialized_) {
|
||||
FDERROR << "OrtBackend is already initlized, cannot initialize again."
|
||||
@@ -92,7 +92,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
strcpy(ops[1].op_name, "pool2d");
|
||||
strcpy(ops[1].export_op_name, "AdaptivePool2d");
|
||||
|
||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||
if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
|
||||
params_buffer.c_str(), params_buffer.size(),
|
||||
&model_content_ptr, &model_content_size, 11, true,
|
||||
verbose, true, true, true, ops.data(), 2,
|
||||
"onnxruntime", nullptr, 0, "", &save_external)) {
|
||||
@@ -112,9 +113,8 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
model_file_name.c_str());
|
||||
f << onnx_model_proto;
|
||||
f.close();
|
||||
return InitFromOnnx(model_file_name, option, false);
|
||||
}
|
||||
return InitFromOnnx(onnx_model_proto, option, true);
|
||||
return InitFromOnnx(onnx_model_proto, option);
|
||||
#else
|
||||
FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
|
||||
"call `InitFromOnnx` instead."
|
||||
@@ -124,8 +124,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
}
|
||||
|
||||
bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
const OrtBackendOption& option,
|
||||
bool from_memory_buffer) {
|
||||
const OrtBackendOption& option) {
|
||||
if (initialized_) {
|
||||
FDERROR << "OrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
@@ -134,17 +133,7 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
|
||||
BuildOption(option);
|
||||
InitCustomOperators();
|
||||
if (from_memory_buffer) {
|
||||
session_ = {env_, model_file.data(), model_file.size(), session_options_};
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
session_ = {env_,
|
||||
std::wstring(model_file.begin(), model_file.end()).c_str(),
|
||||
session_options_};
|
||||
#else
|
||||
session_ = {env_, model_file.c_str(), session_options_};
|
||||
#endif
|
||||
}
|
||||
binding_ = std::make_shared<Ort::IoBinding>(session_);
|
||||
|
||||
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
|
@@ -39,14 +39,13 @@ class OrtBackend : public BaseBackend {
|
||||
|
||||
void BuildOption(const OrtBackendOption& option);
|
||||
|
||||
bool InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
bool InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const OrtBackendOption& option = OrtBackendOption(),
|
||||
bool verbose = false);
|
||||
|
||||
bool InitFromOnnx(const std::string& model_file,
|
||||
const OrtBackendOption& option = OrtBackendOption(),
|
||||
bool from_memory_buffer = false);
|
||||
bool InitFromOnnx(const std::string& model_buffer,
|
||||
const OrtBackendOption& option = OrtBackendOption());
|
||||
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||
bool copy_to_fd = true) override;
|
||||
|
@@ -39,10 +39,7 @@ struct PaddleBackendOption {
|
||||
std::string model_file = ""; // Path of model file
|
||||
std::string params_file = ""; // Path of parameters file, can be empty
|
||||
|
||||
std::string model_buffer_ = "";
|
||||
std::string params_buffer_ = "";
|
||||
size_t model_buffer_size_ = 0;
|
||||
size_t params_buffer_size_ = 0;
|
||||
// load model and paramters from memory
|
||||
bool model_from_memory_ = false;
|
||||
|
||||
#ifdef WITH_GPU
|
||||
|
@@ -89,9 +89,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
}
|
||||
}
|
||||
|
||||
bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const PaddleBackendOption& option) {
|
||||
// bool PaddleBackend::InitFromPaddle(const std::string& contents) {
|
||||
if (initialized_) {
|
||||
FDERROR << "PaddleBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
@@ -102,16 +103,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
||||
// PaddleReader instead now
|
||||
std::string contents;
|
||||
|
||||
if (option.model_from_memory_) {
|
||||
config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_,
|
||||
params_file.c_str(), option.params_buffer_size_);
|
||||
contents = model_file;
|
||||
} else {
|
||||
config_.SetModel(model_file, params_file);
|
||||
if (!ReadBinaryFromFile(model_file, &contents)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
||||
params_buffer.c_str(), params_buffer.size());
|
||||
contents = model_buffer;
|
||||
config_.EnableMemoryOptim();
|
||||
BuildOption(option);
|
||||
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
||||
@@ -172,20 +166,16 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
||||
// Set the shape info file.
|
||||
std::string curr_model_dir = "./";
|
||||
if (!option.model_from_memory_) {
|
||||
curr_model_dir = GetDirFromPath(model_file);
|
||||
curr_model_dir = GetDirFromPath(option.model_file);
|
||||
}
|
||||
std::string shape_range_info =
|
||||
PathJoin(curr_model_dir, "shape_range_info.pbtxt");
|
||||
if (!CheckFileExists(shape_range_info)) {
|
||||
FDINFO << "Start generating shape range info file." << std::endl;
|
||||
paddle_infer::Config analysis_config;
|
||||
if (option.model_from_memory_) {
|
||||
analysis_config.SetModelBuffer(
|
||||
model_file.c_str(), option.model_buffer_size_, params_file.c_str(),
|
||||
option.params_buffer_size_);
|
||||
} else {
|
||||
analysis_config.SetModel(model_file, params_file);
|
||||
}
|
||||
analysis_config.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
||||
params_buffer.c_str(),
|
||||
params_buffer.size());
|
||||
analysis_config.CollectShapeRangeInfo(shape_range_info);
|
||||
auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
|
||||
std::map<std::string, std::vector<int>> max_shape;
|
||||
@@ -258,7 +248,8 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption& runtime_option,
|
||||
void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend =
|
||||
utils::make_unique<PaddleBackend>();
|
||||
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
|
||||
@@ -266,8 +257,27 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
|
||||
auto clone_option = option_;
|
||||
clone_option.gpu_id = device_id;
|
||||
clone_option.external_stream_ = stream;
|
||||
casted_backend->InitFromPaddle(clone_option.model_file,
|
||||
clone_option.params_file, clone_option);
|
||||
if (runtime_option.model_from_memory_) {
|
||||
FDASSERT(
|
||||
casted_backend->InitFromPaddle(runtime_option.model_buffer_,
|
||||
runtime_option.params_buffer_,
|
||||
clone_option),
|
||||
"Clone model from Paddle failed while initialize PaddleBackend.");
|
||||
} else {
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(
|
||||
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file while cloning PaddleBackend");
|
||||
FDASSERT(ReadBinaryFromFile(clone_option.params_file, ¶ms_buffer),
|
||||
"Fail to read binary from parameter file while cloning "
|
||||
"PaddleBackend");
|
||||
FDASSERT(
|
||||
casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
||||
clone_option),
|
||||
"Clone model from Paddle failed while initialize PaddleBackend.");
|
||||
}
|
||||
|
||||
FDWARNING << "The target device id:" << device_id
|
||||
<< " is different from current device id:" << option_.gpu_id
|
||||
<< ", cannot share memory with current engine." << std::endl;
|
||||
|
@@ -53,8 +53,8 @@ class PaddleBackend : public BaseBackend {
|
||||
virtual ~PaddleBackend() = default;
|
||||
void BuildOption(const PaddleBackendOption& option);
|
||||
|
||||
bool
|
||||
InitFromPaddle(const std::string& model_file, const std::string& params_file,
|
||||
bool InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const PaddleBackendOption& option = PaddleBackendOption());
|
||||
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||
@@ -64,7 +64,8 @@ class PaddleBackend : public BaseBackend {
|
||||
|
||||
int NumOutputs() const override { return outputs_desc_.size(); }
|
||||
|
||||
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
|
||||
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
void* stream = nullptr,
|
||||
int device_id = -1) override;
|
||||
|
||||
TensorInfo GetInputInfo(int index) override;
|
||||
|
@@ -24,6 +24,7 @@ namespace fastdeploy {
|
||||
struct TrtBackendOption {
|
||||
std::string model_file = ""; // Path of model file
|
||||
std::string params_file = ""; // Path of parameters file, can be empty
|
||||
|
||||
// format of input model
|
||||
ModelFormat model_format = ModelFormat::AUTOREC;
|
||||
|
||||
|
@@ -113,8 +113,8 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const TrtBackendOption& option, bool verbose) {
|
||||
if (initialized_) {
|
||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
||||
@@ -132,7 +132,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
int model_content_size = 0;
|
||||
char* calibration_cache_ptr;
|
||||
int calibration_cache_size = 0;
|
||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||
if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
|
||||
params_buffer.c_str(), params_buffer.size(),
|
||||
&model_content_ptr, &model_content_size, 11, true,
|
||||
verbose, true, true, true, ops.data(), 1, "tensorrt",
|
||||
&calibration_cache_ptr, &calibration_cache_size, "",
|
||||
@@ -141,7 +142,6 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string onnx_model_proto(model_content_ptr,
|
||||
model_content_ptr + model_content_size);
|
||||
delete[] model_content_ptr;
|
||||
@@ -159,9 +159,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
model_file_name_.c_str());
|
||||
f << onnx_model_proto;
|
||||
f.close();
|
||||
return InitFromOnnx(model_file_name_, option, false);
|
||||
}
|
||||
return InitFromOnnx(onnx_model_proto, option, true);
|
||||
return InitFromOnnx(onnx_model_proto, option);
|
||||
#else
|
||||
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
|
||||
"call `InitFromOnnx` instead."
|
||||
@@ -170,9 +169,8 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
#endif
|
||||
}
|
||||
|
||||
bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
const TrtBackendOption& option,
|
||||
bool from_memory_buffer) {
|
||||
bool TrtBackend::InitFromOnnx(const std::string& model_buffer,
|
||||
const TrtBackendOption& option) {
|
||||
if (initialized_) {
|
||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
@@ -181,22 +179,7 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
option_ = option;
|
||||
cudaSetDevice(option_.gpu_id);
|
||||
|
||||
std::string onnx_content = "";
|
||||
if (!from_memory_buffer) {
|
||||
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
|
||||
if (!fin) {
|
||||
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
fin.seekg(0, std::ios::end);
|
||||
onnx_content.resize(fin.tellg());
|
||||
fin.seekg(0, std::ios::beg);
|
||||
fin.read(&(onnx_content.at(0)), onnx_content.size());
|
||||
fin.close();
|
||||
} else {
|
||||
onnx_content = model_file;
|
||||
}
|
||||
std::string onnx_content = model_buffer;
|
||||
|
||||
// This part of code will record the original outputs order
|
||||
// because the converted tensorrt network may exist wrong order of outputs
|
||||
@@ -739,21 +722,41 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> TrtBackend::Clone(void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> TrtBackend::Clone(RuntimeOption& runtime_option,
|
||||
void* stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
|
||||
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
|
||||
if (device_id > 0 && device_id != option_.gpu_id) {
|
||||
auto clone_option = option_;
|
||||
clone_option.gpu_id = device_id;
|
||||
clone_option.external_stream_ = stream;
|
||||
if (runtime_option.model_from_memory_) {
|
||||
FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_buffer_,
|
||||
runtime_option.params_buffer_,
|
||||
clone_option),
|
||||
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||
} else {
|
||||
if (option_.model_format == ModelFormat::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
|
||||
std::string model_buffer = "";
|
||||
FDASSERT(
|
||||
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file while cloning TrtBackend");
|
||||
FDASSERT(casted_backend->InitFromOnnx(model_buffer, clone_option),
|
||||
"Clone model from ONNX failed while initialize TrtBackend.");
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(
|
||||
option_.model_file, option_.params_file, clone_option),
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(
|
||||
ReadBinaryFromFile(clone_option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file while cloning TrtBackend");
|
||||
FDASSERT(
|
||||
ReadBinaryFromFile(clone_option.params_file, ¶ms_buffer),
|
||||
"Fail to read binary from parameter file while cloning TrtBackend");
|
||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
||||
clone_option),
|
||||
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||
}
|
||||
}
|
||||
FDWARNING << "The target device id:" << device_id
|
||||
<< " is different from current device id:" << option_.gpu_id
|
||||
<< ", cannot share memory with current engine." << std::endl;
|
||||
|
@@ -72,13 +72,12 @@ class TrtBackend : public BaseBackend {
|
||||
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
||||
void BuildOption(const TrtBackendOption& option);
|
||||
|
||||
bool InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
bool InitFromPaddle(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const TrtBackendOption& option = TrtBackendOption(),
|
||||
bool verbose = false);
|
||||
bool InitFromOnnx(const std::string& model_file,
|
||||
const TrtBackendOption& option = TrtBackendOption(),
|
||||
bool from_memory_buffer = false);
|
||||
bool InitFromOnnx(const std::string& model_buffer,
|
||||
const TrtBackendOption& option = TrtBackendOption());
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||
bool copy_to_fd = true) override;
|
||||
|
||||
@@ -88,7 +87,8 @@ class TrtBackend : public BaseBackend {
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
std::vector<TensorInfo> GetInputInfos() override;
|
||||
std::vector<TensorInfo> GetOutputInfos() override;
|
||||
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
|
||||
std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
void* stream = nullptr,
|
||||
int device_id = -1) override;
|
||||
|
||||
~TrtBackend() {
|
||||
|
@@ -208,6 +208,15 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Runtime::ReleaseModelMemoryBuffer() {
|
||||
if (option.model_from_memory_) {
|
||||
option.model_buffer_.clear();
|
||||
option.model_buffer_.shrink_to_fit();
|
||||
option.params_buffer_.clear();
|
||||
option.params_buffer_.shrink_to_fit();
|
||||
}
|
||||
}
|
||||
|
||||
void Runtime::CreatePaddleBackend() {
|
||||
FDASSERT(
|
||||
option.device == Device::CPU || option.device == Device::GPU ||
|
||||
@@ -231,12 +240,6 @@ void Runtime::CreatePaddleBackend() {
|
||||
pd_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||
pd_option.external_stream_ = option.external_stream_;
|
||||
pd_option.model_from_memory_ = option.model_from_memory_;
|
||||
if (pd_option.model_from_memory_) {
|
||||
pd_option.model_buffer_ = option.model_buffer_;
|
||||
pd_option.params_buffer_ = option.params_buffer_;
|
||||
pd_option.model_buffer_size_ = option.model_buffer_size_;
|
||||
pd_option.params_buffer_size_ = option.params_buffer_size_;
|
||||
}
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
if (pd_option.use_gpu && option.pd_enable_trt) {
|
||||
pd_option.enable_trt = true;
|
||||
@@ -276,9 +279,16 @@ void Runtime::CreatePaddleBackend() {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
|
||||
option.params_buffer_, pd_option),
|
||||
"Load model from Paddle failed while initliazing PaddleBackend.");
|
||||
ReleaseModelMemoryBuffer();
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||
option.params_file, pd_option),
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file");
|
||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||
"Fail to read binary from parameter file");
|
||||
FDASSERT(
|
||||
casted_backend->InitFromPaddle(model_buffer, params_buffer, pd_option),
|
||||
"Load model from Paddle failed while initliazing PaddleBackend.");
|
||||
}
|
||||
#else
|
||||
@@ -291,6 +301,10 @@ void Runtime::CreatePaddleBackend() {
|
||||
}
|
||||
|
||||
void Runtime::CreateOpenVINOBackend() {
|
||||
// TODO(huangjianhui) OpenVINO only supports to load ONNX format model from
|
||||
// memory Temporarily disable this function
|
||||
FDASSERT(option.model_from_memory_ == false,
|
||||
"OpenVINOBackend don't support to load model from memory");
|
||||
FDASSERT(option.device == Device::CPU,
|
||||
"Backend::OPENVINO only supports Device::CPU");
|
||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||
@@ -342,16 +356,37 @@ void Runtime::CreateOrtBackend() {
|
||||
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||
ort_option.gpu_id = option.device_id;
|
||||
ort_option.external_stream_ = option.external_stream_;
|
||||
|
||||
backend_ = utils::make_unique<OrtBackend>();
|
||||
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
|
||||
if (option.model_format == ModelFormat::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
|
||||
if (option.model_from_memory_) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, ort_option),
|
||||
"Load model from ONNX failed while initliazing OrtBackend.");
|
||||
ReleaseModelMemoryBuffer();
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||
option.params_file, ort_option),
|
||||
std::string model_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file");
|
||||
FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
|
||||
"Load model from ONNX failed while initliazing OrtBackend.");
|
||||
}
|
||||
} else {
|
||||
if (option.model_from_memory_) {
|
||||
FDASSERT(casted_backend->InitFromPaddle(
|
||||
option.model_buffer_, option.params_buffer_, ort_option),
|
||||
"Load model from Paddle failed while initliazing OrtBackend.");
|
||||
ReleaseModelMemoryBuffer();
|
||||
} else {
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file");
|
||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||
"Fail to read binary from parameter file");
|
||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
||||
ort_option),
|
||||
"Load model from Paddle failed while initliazing OrtBackend.");
|
||||
}
|
||||
}
|
||||
#else
|
||||
FDASSERT(false,
|
||||
@@ -385,16 +420,37 @@ void Runtime::CreateTrtBackend() {
|
||||
trt_option.serialize_file = option.trt_serialize_file;
|
||||
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||
trt_option.external_stream_ = option.external_stream_;
|
||||
|
||||
backend_ = utils::make_unique<TrtBackend>();
|
||||
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
|
||||
if (option.model_format == ModelFormat::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
|
||||
if (option.model_from_memory_) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, trt_option),
|
||||
"Load model from ONNX failed while initliazing TrtBackend.");
|
||||
ReleaseModelMemoryBuffer();
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||
option.params_file, trt_option),
|
||||
std::string model_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file");
|
||||
FDASSERT(casted_backend->InitFromOnnx(model_buffer, trt_option),
|
||||
"Load model from ONNX failed while initliazing TrtBackend.");
|
||||
}
|
||||
} else {
|
||||
if (option.model_from_memory_) {
|
||||
FDASSERT(casted_backend->InitFromPaddle(
|
||||
option.model_buffer_, option.params_buffer_, trt_option),
|
||||
"Load model from Paddle failed while initliazing TrtBackend.");
|
||||
ReleaseModelMemoryBuffer();
|
||||
} else {
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Fail to read binary from model file");
|
||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||
"Fail to read binary from parameter file");
|
||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
||||
trt_option),
|
||||
"Load model from Paddle failed while initliazing TrtBackend.");
|
||||
}
|
||||
}
|
||||
#else
|
||||
FDASSERT(false,
|
||||
@@ -406,6 +462,9 @@ void Runtime::CreateTrtBackend() {
|
||||
}
|
||||
|
||||
void Runtime::CreateLiteBackend() {
|
||||
#ifdef ENABLE_LITE_BACKEND
|
||||
FDASSERT(option.model_from_memory_ == false,
|
||||
"LiteBackend don't support to load model from memory");
|
||||
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
|
||||
option.device == Device::KUNLUNXIN ||
|
||||
option.device == Device::ASCEND,
|
||||
@@ -413,7 +472,6 @@ void Runtime::CreateLiteBackend() {
|
||||
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
|
||||
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
||||
"LiteBackend only support model format of ModelFormat::PADDLE");
|
||||
#ifdef ENABLE_LITE_BACKEND
|
||||
backend_ = utils::make_unique<LiteBackend>();
|
||||
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
||||
@@ -429,6 +487,8 @@ void Runtime::CreateLiteBackend() {
|
||||
}
|
||||
|
||||
void Runtime::CreateRKNPU2Backend() {
|
||||
FDASSERT(option.model_from_memory_ == false,
|
||||
"RKNPU2Backend don't support to load model from memory");
|
||||
FDASSERT(option.device == Device::RKNPU,
|
||||
"Backend::RKNPU2 only supports Device::RKNPU2");
|
||||
FDASSERT(option.model_format == ModelFormat::RKNN,
|
||||
@@ -451,11 +511,14 @@ void Runtime::CreateRKNPU2Backend() {
|
||||
}
|
||||
|
||||
void Runtime::CreateSophgoNPUBackend() {
|
||||
#ifdef ENABLE_SOPHGO_BACKEND
|
||||
auto sophgo_option = SophgoBackendOption();
|
||||
FDASSERT(option.model_from_memory_ == false,
|
||||
"SophgoBackend don't support to load model from memory");
|
||||
FDASSERT(option.device == Device::SOPHGOTPUD,
|
||||
"Backend::SOPHGO only supports Device::SOPHGO");
|
||||
FDASSERT(option.model_format == ModelFormat::SOPHGO,
|
||||
"SophgoBackend only support model format of ModelFormat::SOPHGO");
|
||||
#ifdef ENABLE_SOPHGO_BACKEND
|
||||
auto sophgo_option = SophgoBackendOption();
|
||||
backend_ = utils::make_unique<SophgoBackend>();
|
||||
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
|
||||
@@ -486,7 +549,7 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
|
||||
FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
|
||||
<< option.device << "." << std::endl;
|
||||
runtime->option = option;
|
||||
runtime->backend_ = backend_->Clone(stream, device_id);
|
||||
runtime->backend_ = backend_->Clone(option, stream, device_id);
|
||||
return runtime;
|
||||
}
|
||||
|
||||
|
@@ -83,6 +83,8 @@ struct FASTDEPLOY_DECL Runtime {
|
||||
*/
|
||||
Runtime* Clone(void* stream = nullptr, int device_id = -1);
|
||||
|
||||
void ReleaseModelMemoryBuffer();
|
||||
|
||||
RuntimeOption option;
|
||||
|
||||
/** \brief Compile TorchScript Module, only for Poros backend
|
||||
|
@@ -38,29 +38,21 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
|
||||
}
|
||||
}
|
||||
|
||||
void RuntimeOption::SetModelBuffer(const char* model_buffer,
|
||||
size_t model_buffer_size,
|
||||
const char* params_buffer,
|
||||
size_t params_buffer_size,
|
||||
void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
|
||||
const std::string& params_buffer,
|
||||
const ModelFormat& format) {
|
||||
model_buffer_size_ = model_buffer_size;
|
||||
params_buffer_size_ = params_buffer_size;
|
||||
model_from_memory_ = true;
|
||||
if (format == ModelFormat::PADDLE) {
|
||||
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||
params_buffer_ =
|
||||
std::string(params_buffer, params_buffer + params_buffer_size);
|
||||
model_buffer_ = model_buffer;
|
||||
params_buffer_ = params_buffer;
|
||||
model_format = ModelFormat::PADDLE;
|
||||
} else if (format == ModelFormat::ONNX) {
|
||||
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||
model_buffer_ = model_buffer;
|
||||
model_format = ModelFormat::ONNX;
|
||||
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||
model_format = ModelFormat::TORCHSCRIPT;
|
||||
} else {
|
||||
FDASSERT(false,
|
||||
"The model format only can be "
|
||||
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
|
||||
"ModelFormat::PADDLE/ModelFormat::ONNX.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -50,14 +50,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
|
||||
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
|
||||
*
|
||||
* \param[in] model_buffer The memory buffer of model
|
||||
* \param[in] model_buffer_size The size of the model data
|
||||
* \param[in] params_buffer The memory buffer of the combined parameters file
|
||||
* \param[in] params_buffer_size The size of the combined parameters data
|
||||
* \param[in] model_buffer The string of model memory buffer
|
||||
* \param[in] params_buffer The string of parameters memory buffer
|
||||
* \param[in] format Format of the loaded model
|
||||
*/
|
||||
void SetModelBuffer(const char* model_buffer, size_t model_buffer_size,
|
||||
const char* params_buffer, size_t params_buffer_size,
|
||||
void SetModelBuffer(const std::string& model_buffer,
|
||||
const std::string& params_buffer = "",
|
||||
const ModelFormat& format = ModelFormat::PADDLE);
|
||||
|
||||
/// Use cpu to inference, the runtime will inference on CPU by default
|
||||
@@ -431,8 +429,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
|
||||
std::string model_buffer_ = "";
|
||||
std::string params_buffer_ = "";
|
||||
size_t model_buffer_size_ = 0;
|
||||
size_t params_buffer_size_ = 0;
|
||||
bool model_from_memory_ = false;
|
||||
};
|
||||
|
||||
|
@@ -229,20 +229,14 @@ class RuntimeOption:
|
||||
|
||||
def set_model_buffer(self,
|
||||
model_buffer,
|
||||
model_buffer_size,
|
||||
params_buffer,
|
||||
params_buffer_size,
|
||||
params_buffer="",
|
||||
model_format=ModelFormat.PADDLE):
|
||||
"""Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
|
||||
|
||||
:param model_buffer: (bytes)The memory buffer of model
|
||||
:param model_buffer_size: (unsigned int)The size of the model data.
|
||||
:param params_buffer: (bytes)The memory buffer of the combined parameters file
|
||||
:param params_buffer_size: (unsigned inst)The size of the combined parameters data
|
||||
:param params_buffer: (bytes)The memory buffer of the parameters
|
||||
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
|
||||
"""
|
||||
return self._option.set_model_buffer(model_buffer, model_buffer_size,
|
||||
params_buffer, params_buffer_size,
|
||||
return self._option.set_model_buffer(model_buffer, params_buffer,
|
||||
model_format)
|
||||
|
||||
def use_gpu(self, device_id=0):
|
||||
|
Reference in New Issue
Block a user