From 291db315c89d2e69edeb7a8edefcef714d13bfcd Mon Sep 17 00:00:00 2001 From: huangjianhui <852142024@qq.com> Date: Wed, 21 Dec 2022 14:21:28 +0800 Subject: [PATCH] [Other]Fastdeploy supports set_model_buffer function for encrypted model (#930) * Update keypointdetection result docs * Update im.copy() to im in examples * Update new Api, fastdeploy::vision::Visualize to fastdeploy::vision * Update SwapBackgroundSegmentation && SwapBackgroundMatting to SwapBackground * Update README_CN.md * Update README_CN.md * Support set_model_buffer function --- fastdeploy/backends/paddle/paddle_backend.cc | 30 ++++++++++----- fastdeploy/backends/paddle/paddle_backend.h | 6 +++ fastdeploy/pybind/runtime.cc | 6 ++- fastdeploy/runtime.cc | 40 +++++++++++++++++++- fastdeploy/runtime.h | 20 ++++++++++ python/fastdeploy/runtime.py | 20 +++++++++- 6 files changed, 110 insertions(+), 12 deletions(-) diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc index 49abf02b1..67f1eb762 100644 --- a/fastdeploy/backends/paddle/paddle_backend.cc +++ b/fastdeploy/backends/paddle/paddle_backend.cc @@ -99,17 +99,22 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, << std::endl; return false; } - config_.SetModel(model_file, params_file); - config_.EnableMemoryOptim(); - BuildOption(option); // The input/output information get from predictor is not right, use PaddleReader instead now std::string contents; - if (!ReadBinaryFromFile(model_file, &contents)) { - return false; - } - auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); + if (option.model_from_memory_) { + config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); + contents = model_file; + } else { + config_.SetModel(model_file, params_file); + if (!ReadBinaryFromFile(model_file, &contents)) { + return false; + } + } + config_.EnableMemoryOptim(); + BuildOption(option); + auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode if (reader.is_quantize_model) { if (option.use_gpu) { @@ -167,13 +172,20 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, #ifdef ENABLE_TRT_BACKEND if (option.collect_shape) { // Set the shape info file. - auto curr_model_dir = GetDirFromPath(model_file); + std::string curr_model_dir = "./"; + if (!option.model_from_memory_) { + curr_model_dir = GetDirFromPath(model_file); + } std::string shape_range_info = PathJoin(curr_model_dir, "shape_range_info.pbtxt"); if (!CheckFileExists(shape_range_info)) { FDINFO << "Start generating shape range info file." << std::endl; paddle_infer::Config analysis_config; - analysis_config.SetModel(model_file, params_file); + if (option.model_from_memory_) { + analysis_config.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_); + } else { + analysis_config.SetModel(model_file, params_file); + } analysis_config.CollectShapeRangeInfo(shape_range_info); auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config); std::map> max_shape; diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h index 2df0c6739..1d4c34596 100644 --- a/fastdeploy/backends/paddle/paddle_backend.h +++ b/fastdeploy/backends/paddle/paddle_backend.h @@ -47,6 +47,12 @@ struct PaddleBackendOption { std::string model_file = ""; // Path of model file std::string params_file = ""; // Path of parameters file, can be empty + std::string model_buffer_ = ""; + std::string params_buffer_ = ""; + size_t model_buffer_size_ = 0; + size_t params_buffer_size_ = 0; + bool model_from_memory_ = false; + #ifdef WITH_GPU bool use_gpu = true; #else diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index d0cb0b5f8..b6fd35902 100755 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -20,6 +20,7 @@ void BindRuntime(pybind11::module& m) { pybind11::class_(m, "RuntimeOption") .def(pybind11::init()) .def("set_model_path", &RuntimeOption::SetModelPath) + .def("set_model_buffer", &RuntimeOption::SetModelBuffer) .def("use_gpu", &RuntimeOption::UseGpu) .def("use_cpu", &RuntimeOption::UseCpu) .def("use_rknpu2", &RuntimeOption::UseRKNPU2) @@ -67,7 +68,10 @@ void BindRuntime(pybind11::module& m) { .def_readwrite("params_file", &RuntimeOption::params_file) .def_readwrite("model_format", &RuntimeOption::model_format) .def_readwrite("backend", &RuntimeOption::backend) - .def_readwrite("backend", &RuntimeOption::external_stream_) + .def_readwrite("external_stream", &RuntimeOption::external_stream_) + .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_) + .def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_) + .def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_) .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) .def_readwrite("device_id", &RuntimeOption::device_id) .def_readwrite("device", &RuntimeOption::device) diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 565c10607..da1033d4c 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -213,6 +213,31 @@ void RuntimeOption::SetModelPath(const std::string& model_path, } } +void RuntimeOption::SetModelBuffer(const char * model_buffer, + size_t model_buffer_size, + const char * params_buffer, + size_t params_buffer_size, + const ModelFormat& format) { + model_buffer_size_ = model_buffer_size; + params_buffer_size_ = params_buffer_size; + model_from_memory_ = true; + if (format == ModelFormat::PADDLE) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size); + model_format = ModelFormat::PADDLE; + } else if (format == ModelFormat::ONNX) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + model_format = ModelFormat::ONNX; + } else if (format == ModelFormat::TORCHSCRIPT) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + model_format = ModelFormat::TORCHSCRIPT; + } else { + FDASSERT(false, + "The model format only can be " + "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); + } +} + void RuntimeOption::UseGpu(int gpu_id) { #ifdef WITH_GPU device = Device::GPU; @@ -646,6 +671,13 @@ void Runtime::CreatePaddleBackend() { pd_option.cpu_thread_num = option.cpu_thread_num; pd_option.enable_pinned_memory = option.enable_pinned_memory; pd_option.external_stream_ = option.external_stream_; + pd_option.model_from_memory_ = option.model_from_memory_; + if (pd_option.model_from_memory_) { + pd_option.model_buffer_ = option.model_buffer_; + pd_option.params_buffer_ = option.params_buffer_; + pd_option.model_buffer_size_ = option.model_buffer_size_; + pd_option.params_buffer_size_ = option.params_buffer_size_; + } #ifdef ENABLE_TRT_BACKEND if (pd_option.use_gpu && option.pd_enable_trt) { pd_option.enable_trt = true; @@ -683,9 +715,15 @@ void Runtime::CreatePaddleBackend() { "PaddleBackend only support model format of ModelFormat::PADDLE."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, + if (pd_option.model_from_memory_) { + FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, option.params_buffer_, pd_option), "Load model from Paddle failed while initliazing PaddleBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, + pd_option), + "Load model from Paddle failed while initliazing PaddleBackend."); + } #else FDASSERT(false, "PaddleBackend is not available, please compiled with " "ENABLE_PADDLE_BACKEND=ON."); diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index b0b159008..636ce93f9 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -88,6 +88,20 @@ struct FASTDEPLOY_DECL RuntimeOption { const std::string& params_path = "", const ModelFormat& format = ModelFormat::PADDLE); + /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory + * + * \param[in] model_buffer The memory buffer of model + * \param[in] model_buffer_size The size of the model data + * \param[in] params_buffer The memory buffer of the combined parameters file + * \param[in] params_buffer_size The size of the combined parameters data + * \param[in] format Format of the loaded model + */ + void SetModelBuffer(const char * model_buffer, + size_t model_buffer_size, + const char * params_buffer, + size_t params_buffer_size, + const ModelFormat& format = ModelFormat::PADDLE); + /// Use cpu to inference, the runtime will inference on CPU by default void UseCpu(); @@ -431,6 +445,12 @@ struct FASTDEPLOY_DECL RuntimeOption { std::string params_file = ""; // Path of parameters file, can be empty // format of input model ModelFormat model_format = ModelFormat::AUTOREC; + + std::string model_buffer_ = ""; + std::string params_buffer_ = ""; + size_t model_buffer_size_ = 0; + size_t params_buffer_size_ = 0; + bool model_from_memory_ = false; }; /*! @brief Runtime object used to inference the loaded model on different devices diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index f9334efbf..010c77489 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -222,11 +222,29 @@ class RuntimeOption: :param model_path: (str)Path of model file :param params_path: (str)Path of parameters file - :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX + :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT """ return self._option.set_model_path(model_path, params_path, model_format) + def set_model_buffer(self, + model_buffer, + model_buffer_size, + params_buffer, + params_buffer_size, + model_format=ModelFormat.PADDLE): + """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory + + :param model_buffer: (bytes)The memory buffer of model + :param model_buffer_size: (unsigned int)The size of the model data. + :param params_buffer: (bytes)The memory buffer of the combined parameters file + :param params_buffer_size: (unsigned inst)The size of the combined parameters data + :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT + """ + return self._option.set_model_buffer(model_buffer, model_buffer_size, + params_buffer, params_buffer_size, + model_format) + def use_gpu(self, device_id=0): """Inference with Nvidia GPU