[Other]Fastdeploy supports set_model_buffer function for encrypted model (#930)

* Update keypointdetection result docs

* Update im.copy() to im in examples

* Update new Api, fastdeploy::vision::Visualize to fastdeploy::vision

* Update SwapBackgroundSegmentation && SwapBackgroundMatting to SwapBackground

* Update README_CN.md

* Update README_CN.md

* Support set_model_buffer function
This commit is contained in:
huangjianhui
2022-12-21 14:21:28 +08:00
committed by GitHub
parent b42ec302e6
commit 291db315c8
6 changed files with 110 additions and 12 deletions

View File

@@ -99,17 +99,22 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
<< std::endl;
return false;
}
config_.SetModel(model_file, params_file);
config_.EnableMemoryOptim();
BuildOption(option);
// The input/output information get from predictor is not right, use PaddleReader instead now
std::string contents;
if (option.model_from_memory_) {
config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_);
contents = model_file;
} else {
config_.SetModel(model_file, params_file);
if (!ReadBinaryFromFile(model_file, &contents)) {
return false;
}
}
config_.EnableMemoryOptim();
BuildOption(option);
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
if (reader.is_quantize_model) {
if (option.use_gpu) {
@@ -167,13 +172,20 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
#ifdef ENABLE_TRT_BACKEND
if (option.collect_shape) {
// Set the shape info file.
auto curr_model_dir = GetDirFromPath(model_file);
std::string curr_model_dir = "./";
if (!option.model_from_memory_) {
curr_model_dir = GetDirFromPath(model_file);
}
std::string shape_range_info =
PathJoin(curr_model_dir, "shape_range_info.pbtxt");
if (!CheckFileExists(shape_range_info)) {
FDINFO << "Start generating shape range info file." << std::endl;
paddle_infer::Config analysis_config;
if (option.model_from_memory_) {
analysis_config.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_);
} else {
analysis_config.SetModel(model_file, params_file);
}
analysis_config.CollectShapeRangeInfo(shape_range_info);
auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
std::map<std::string, std::vector<int>> max_shape;

View File

@@ -47,6 +47,12 @@ struct PaddleBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false;
#ifdef WITH_GPU
bool use_gpu = true;
#else

View File

@@ -20,6 +20,7 @@ void BindRuntime(pybind11::module& m) {
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
.def(pybind11::init())
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
@@ -67,7 +68,10 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("params_file", &RuntimeOption::params_file)
.def_readwrite("model_format", &RuntimeOption::model_format)
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("backend", &RuntimeOption::external_stream_)
.def_readwrite("external_stream", &RuntimeOption::external_stream_)
.def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
.def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_)
.def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)

View File

@@ -213,6 +213,31 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
}
}
void RuntimeOption::SetModelBuffer(const char * model_buffer,
size_t model_buffer_size,
const char * params_buffer,
size_t params_buffer_size,
const ModelFormat& format) {
model_buffer_size_ = model_buffer_size;
params_buffer_size_ = params_buffer_size;
model_from_memory_ = true;
if (format == ModelFormat::PADDLE) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size);
model_format = ModelFormat::PADDLE;
} else if (format == ModelFormat::ONNX) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
model_format = ModelFormat::ONNX;
} else if (format == ModelFormat::TORCHSCRIPT) {
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
model_format = ModelFormat::TORCHSCRIPT;
} else {
FDASSERT(false,
"The model format only can be "
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
}
}
void RuntimeOption::UseGpu(int gpu_id) {
#ifdef WITH_GPU
device = Device::GPU;
@@ -646,6 +671,13 @@ void Runtime::CreatePaddleBackend() {
pd_option.cpu_thread_num = option.cpu_thread_num;
pd_option.enable_pinned_memory = option.enable_pinned_memory;
pd_option.external_stream_ = option.external_stream_;
pd_option.model_from_memory_ = option.model_from_memory_;
if (pd_option.model_from_memory_) {
pd_option.model_buffer_ = option.model_buffer_;
pd_option.params_buffer_ = option.params_buffer_;
pd_option.model_buffer_size_ = option.model_buffer_size_;
pd_option.params_buffer_size_ = option.params_buffer_size_;
}
#ifdef ENABLE_TRT_BACKEND
if (pd_option.use_gpu && option.pd_enable_trt) {
pd_option.enable_trt = true;
@@ -683,9 +715,15 @@ void Runtime::CreatePaddleBackend() {
"PaddleBackend only support model format of ModelFormat::PADDLE.");
backend_ = utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
if (pd_option.model_from_memory_) {
FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, option.params_buffer_,
pd_option),
"Load model from Paddle failed while initliazing PaddleBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
pd_option),
"Load model from Paddle failed while initliazing PaddleBackend.");
}
#else
FDASSERT(false, "PaddleBackend is not available, please compiled with "
"ENABLE_PADDLE_BACKEND=ON.");

View File

@@ -88,6 +88,20 @@ struct FASTDEPLOY_DECL RuntimeOption {
const std::string& params_path = "",
const ModelFormat& format = ModelFormat::PADDLE);
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
*
* \param[in] model_buffer The memory buffer of model
* \param[in] model_buffer_size The size of the model data
* \param[in] params_buffer The memory buffer of the combined parameters file
* \param[in] params_buffer_size The size of the combined parameters data
* \param[in] format Format of the loaded model
*/
void SetModelBuffer(const char * model_buffer,
size_t model_buffer_size,
const char * params_buffer,
size_t params_buffer_size,
const ModelFormat& format = ModelFormat::PADDLE);
/// Use cpu to inference, the runtime will inference on CPU by default
void UseCpu();
@@ -431,6 +445,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
std::string params_file = ""; // Path of parameters file, can be empty
// format of input model
ModelFormat model_format = ModelFormat::AUTOREC;
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false;
};
/*! @brief Runtime object used to inference the loaded model on different devices

View File

@@ -222,11 +222,29 @@ class RuntimeOption:
:param model_path: (str)Path of model file
:param params_path: (str)Path of parameters file
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
"""
return self._option.set_model_path(model_path, params_path,
model_format)
def set_model_buffer(self,
model_buffer,
model_buffer_size,
params_buffer,
params_buffer_size,
model_format=ModelFormat.PADDLE):
"""Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
:param model_buffer: (bytes)The memory buffer of model
:param model_buffer_size: (unsigned int)The size of the model data.
:param params_buffer: (bytes)The memory buffer of the combined parameters file
:param params_buffer_size: (unsigned inst)The size of the combined parameters data
:param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
"""
return self._option.set_model_buffer(model_buffer, model_buffer_size,
params_buffer, params_buffer_size,
model_format)
def use_gpu(self, device_id=0):
"""Inference with Nvidia GPU