[Other]Fastdeploy supports set_model_buffer function for encrypted model (#930)

* Update keypointdetection result docs * Update im.copy() to im in examples * Update new Api, fastdeploy::vision::Visualize to fastdeploy::vision * Update SwapBackgroundSegmentation && SwapBackgroundMatting to SwapBackground * Update README_CN.md * Update README_CN.md * Support set_model_buffer function
2025-10-05 16:48:03 +08:00 · 2022-12-21 14:21:28 +08:00
parent b42ec302e6
commit 291db315c8
6 changed files with 110 additions and 12 deletions
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -99,17 +99,22 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
            << std::endl;
    return false;
  }
-  config_.SetModel(model_file, params_file);
-  config_.EnableMemoryOptim();
-  BuildOption(option);

  // The input/output information get from predictor is not right, use PaddleReader instead now
  std::string contents;
+
+  if (option.model_from_memory_) {
+    config_.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_);
+    contents = model_file;
+  } else {
+    config_.SetModel(model_file, params_file);
    if (!ReadBinaryFromFile(model_file, &contents)) {
      return false;
    }
+  }
+  config_.EnableMemoryOptim();
+  BuildOption(option);
  auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
-
  // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
  if (reader.is_quantize_model) {
    if (option.use_gpu) {
@@ -167,13 +172,20 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
 #ifdef ENABLE_TRT_BACKEND
  if (option.collect_shape) {
    // Set the shape info file.
-    auto curr_model_dir = GetDirFromPath(model_file);
+    std::string curr_model_dir = "./";
+    if (!option.model_from_memory_) {
+      curr_model_dir = GetDirFromPath(model_file);
+    }
    std::string shape_range_info =
        PathJoin(curr_model_dir, "shape_range_info.pbtxt");
    if (!CheckFileExists(shape_range_info)) {
      FDINFO << "Start generating shape range info file." << std::endl;
      paddle_infer::Config analysis_config;
+      if (option.model_from_memory_) {
+        analysis_config.SetModelBuffer(model_file.c_str(), option.model_buffer_size_, params_file.c_str(), option.params_buffer_size_);
+      } else {
        analysis_config.SetModel(model_file, params_file);
+      }
      analysis_config.CollectShapeRangeInfo(shape_range_info);
      auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config);
      std::map<std::string, std::vector<int>> max_shape;
--- a/fastdeploy/backends/paddle/paddle_backend.h
+++ b/fastdeploy/backends/paddle/paddle_backend.h
@@ -47,6 +47,12 @@ struct PaddleBackendOption {
  std::string model_file = "";   // Path of model file
  std::string params_file = "";  // Path of parameters file, can be empty

+  std::string model_buffer_ = "";
+  std::string params_buffer_ = "";
+  size_t model_buffer_size_ = 0;
+  size_t params_buffer_size_ = 0;
+  bool model_from_memory_ = false;
+
 #ifdef WITH_GPU
  bool use_gpu = true;
 #else
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -20,6 +20,7 @@ void BindRuntime(pybind11::module& m) {
  pybind11::class_<RuntimeOption>(m, "RuntimeOption")
      .def(pybind11::init())
      .def("set_model_path", &RuntimeOption::SetModelPath)
+      .def("set_model_buffer", &RuntimeOption::SetModelBuffer)
      .def("use_gpu", &RuntimeOption::UseGpu)
      .def("use_cpu", &RuntimeOption::UseCpu)
      .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
@@ -67,7 +68,10 @@ void BindRuntime(pybind11::module& m) {
      .def_readwrite("params_file", &RuntimeOption::params_file)
      .def_readwrite("model_format", &RuntimeOption::model_format)
      .def_readwrite("backend", &RuntimeOption::backend)
-      .def_readwrite("backend", &RuntimeOption::external_stream_)
+      .def_readwrite("external_stream", &RuntimeOption::external_stream_)
+      .def_readwrite("model_from_memory", &RuntimeOption::model_from_memory_)
+      .def_readwrite("model_buffer_size", &RuntimeOption::model_buffer_size_)
+      .def_readwrite("params_buffer_size", &RuntimeOption::params_buffer_size_)
      .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
      .def_readwrite("device_id", &RuntimeOption::device_id)
      .def_readwrite("device", &RuntimeOption::device)
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -213,6 +213,31 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
  }
 }

+void RuntimeOption::SetModelBuffer(const char * model_buffer,
+                                   size_t model_buffer_size,
+                                   const char * params_buffer,
+                                   size_t params_buffer_size,
+                                   const ModelFormat& format) {
+  model_buffer_size_ = model_buffer_size;
+  params_buffer_size_ = params_buffer_size;
+  model_from_memory_ = true;
+  if (format == ModelFormat::PADDLE) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size);
+    model_format = ModelFormat::PADDLE;
+  } else if (format == ModelFormat::ONNX) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    model_format = ModelFormat::ONNX;
+  } else if (format == ModelFormat::TORCHSCRIPT) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    model_format = ModelFormat::TORCHSCRIPT;
+  } else {
+    FDASSERT(false,
+             "The model format only can be "
+             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
+  }
+}
+
 void RuntimeOption::UseGpu(int gpu_id) {
 #ifdef WITH_GPU
  device = Device::GPU;
@@ -646,6 +671,13 @@ void Runtime::CreatePaddleBackend() {
  pd_option.cpu_thread_num = option.cpu_thread_num;
  pd_option.enable_pinned_memory = option.enable_pinned_memory;
  pd_option.external_stream_ = option.external_stream_;
+  pd_option.model_from_memory_ = option.model_from_memory_;
+  if (pd_option.model_from_memory_) {
+    pd_option.model_buffer_ = option.model_buffer_;
+    pd_option.params_buffer_ = option.params_buffer_;
+    pd_option.model_buffer_size_ = option.model_buffer_size_;
+    pd_option.params_buffer_size_ = option.params_buffer_size_;
+  }
 #ifdef ENABLE_TRT_BACKEND
  if (pd_option.use_gpu && option.pd_enable_trt) {
    pd_option.enable_trt = true;
@@ -683,9 +715,15 @@ void Runtime::CreatePaddleBackend() {
           "PaddleBackend only support model format of ModelFormat::PADDLE.");
  backend_ = utils::make_unique<PaddleBackend>();
  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
+  if (pd_option.model_from_memory_) {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, option.params_buffer_,
+                                          pd_option),
+           "Load model from Paddle failed while initliazing PaddleBackend.");
+  } else {
    FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
                                          pd_option),
           "Load model from Paddle failed while initliazing PaddleBackend.");
+  }
 #else
  FDASSERT(false, "PaddleBackend is not available, please compiled with "
                  "ENABLE_PADDLE_BACKEND=ON.");
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -88,6 +88,20 @@ struct FASTDEPLOY_DECL RuntimeOption {
                    const std::string& params_path = "",
                    const ModelFormat& format = ModelFormat::PADDLE);

+  /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
+   *
+   * \param[in] model_buffer The memory buffer of model
+   * \param[in] model_buffer_size The size of the model data
+   * \param[in] params_buffer The memory buffer of the combined parameters file
+   * \param[in] params_buffer_size The size of the combined parameters data
+   * \param[in] format Format of the loaded model
+   */
+  void SetModelBuffer(const char * model_buffer,
+                      size_t model_buffer_size,
+                      const char * params_buffer,
+                      size_t params_buffer_size,
+                      const ModelFormat& format = ModelFormat::PADDLE);
+
  /// Use cpu to inference, the runtime will inference on CPU by default
  void UseCpu();

@@ -431,6 +445,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
  std::string params_file = "";  // Path of parameters file, can be empty
  // format of input model
  ModelFormat model_format = ModelFormat::AUTOREC;
+
+  std::string model_buffer_ = "";
+  std::string params_buffer_ = "";
+  size_t model_buffer_size_ = 0;
+  size_t params_buffer_size_ = 0;
+  bool model_from_memory_ = false;
 };

 /*! @brief Runtime object used to inference the loaded model on different devices
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -222,11 +222,29 @@ class RuntimeOption:

        :param model_path: (str)Path of model file
        :param params_path: (str)Path of parameters file
-        :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX
+        :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
        """
        return self._option.set_model_path(model_path, params_path,
                                           model_format)

+    def set_model_buffer(self,
+                         model_buffer,
+                         model_buffer_size,
+                         params_buffer,
+                         params_buffer_size,
+                         model_format=ModelFormat.PADDLE):
+        """Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
+
+        :param model_buffer: (bytes)The memory buffer of model
+        :param model_buffer_size: (unsigned int)The size of the model data.
+        :param params_buffer: (bytes)The memory buffer of the combined parameters file
+        :param params_buffer_size: (unsigned inst)The size of the combined parameters data
+        :param model_format: (ModelFormat)Format of model, support ModelFormat.PADDLE/ModelFormat.ONNX/ModelFormat.TORCHSCRIPT
+        """
+        return self._option.set_model_buffer(model_buffer, model_buffer_size,
+                                             params_buffer, params_buffer_size,
+                                             model_format)
+
    def use_gpu(self, device_id=0):
        """Inference with Nvidia GPU