[Other] Optimize load model from memory function (#1205)

Optimize option for runtime
2025-10-06 00:57:33 +08:00 · 2023-02-01 15:50:38 +08:00
parent 29e93fa2dc
commit b4e322af63
5 changed files with 29 additions and 49 deletions
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -259,8 +259,8 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption& runtime_option,
    clone_option.external_stream_ = stream;
    if (runtime_option.model_from_memory_) {
      FDASSERT(
-          casted_backend->InitFromPaddle(runtime_option.model_buffer_,
+          casted_backend->InitFromPaddle(runtime_option.model_file,
-                                         runtime_option.params_buffer_,
+                                         runtime_option.params_file,
                                         clone_option),
          "Clone model from Paddle failed while initialize PaddleBackend.");
    } else {
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -731,8 +731,8 @@ std::unique_ptr<BaseBackend> TrtBackend::Clone(RuntimeOption& runtime_option,
    clone_option.gpu_id = device_id;
    clone_option.external_stream_ = stream;
    if (runtime_option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_buffer_,
+      FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_file,
-                                              runtime_option.params_buffer_,
+                                              runtime_option.params_file,
                                              clone_option),
               "Clone model from Paddle failed while initialize TrtBackend.");
    } else {
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -210,10 +210,10 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
 void Runtime::ReleaseModelMemoryBuffer() {
  if (option.model_from_memory_) {
-    option.model_buffer_.clear();
+    option.model_file.clear();
-    option.model_buffer_.shrink_to_fit();
+    option.model_file.shrink_to_fit();
-    option.params_buffer_.clear();
+    option.params_file.clear();
-    option.params_buffer_.shrink_to_fit();
+    option.params_file.shrink_to_fit();
  }
 }
@@ -276,8 +276,8 @@ void Runtime::CreatePaddleBackend() {
  backend_ = utils::make_unique<PaddleBackend>();
  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
  if (pd_option.model_from_memory_) {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_buffer_, pd_option),
+                                            option.params_file, pd_option),
             "Load model from Paddle failed while initliazing PaddleBackend.");
    ReleaseModelMemoryBuffer();
  } else {
@@ -360,7 +360,7 @@ void Runtime::CreateOrtBackend() {
  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, ort_option),
+      FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
               "Load model from ONNX failed while initliazing OrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
@@ -372,8 +372,8 @@ void Runtime::CreateOrtBackend() {
    }
  } else {
    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromPaddle(
+      FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                   option.model_buffer_, option.params_buffer_, ort_option),
+                                              option.params_file, ort_option),
               "Load model from Paddle failed while initliazing OrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
@@ -424,7 +424,7 @@ void Runtime::CreateTrtBackend() {
  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromOnnx(option.model_buffer_, trt_option),
+      FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
               "Load model from ONNX failed while initliazing TrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
@@ -436,8 +436,8 @@ void Runtime::CreateTrtBackend() {
    }
  } else {
    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromPaddle(
+      FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                   option.model_buffer_, option.params_buffer_, trt_option),
+                                              option.params_file, trt_option),
               "Load model from Paddle failed while initliazing TrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -21,39 +21,19 @@ namespace fastdeploy {
 void RuntimeOption::SetModelPath(const std::string& model_path,
                                 const std::string& params_path,
                                 const ModelFormat& format) {
  if (format == ModelFormat::PADDLE) {
  model_file = model_path;
  params_file = params_path;
-    model_format = ModelFormat::PADDLE;
+  model_format = format;
-  } else if (format == ModelFormat::ONNX) {
+  model_from_memory_ = false;
    model_file = model_path;
    model_format = ModelFormat::ONNX;
  } else if (format == ModelFormat::TORCHSCRIPT) {
    model_file = model_path;
    model_format = ModelFormat::TORCHSCRIPT;
  } else {
    FDASSERT(false,
             "The model format only can be "
             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
  }
 }
 void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
                                   const std::string& params_buffer,
                                   const ModelFormat& format) {
  model_file = model_buffer;
  params_file = params_buffer;
  model_format = format;
  model_from_memory_ = true;
  if (format == ModelFormat::PADDLE) {
    model_buffer_ = model_buffer;
    params_buffer_ = params_buffer;
    model_format = ModelFormat::PADDLE;
  } else if (format == ModelFormat::ONNX) {
    model_buffer_ = model_buffer;
    model_format = ModelFormat::ONNX;
  } else {
    FDASSERT(false,
             "The model format only can be "
             "ModelFormat::PADDLE/ModelFormat::ONNX.");
  }
 }
 void RuntimeOption::UseGpu(int gpu_id) {
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -422,14 +422,14 @@ struct FASTDEPLOY_DECL RuntimeOption {
  /// Option to configure Paddle Lite backend
  LiteBackendOption paddle_lite_option;
-  std::string model_file = "";   // Path of model file
+  // If model_from_memory is true, the model_file and params_file is
-  std::string params_file = "";  // Path of parameters file, can be empty
+  // binary stream in memory;
  // Otherwise, the model_file and params_file means the path of file
  std::string model_file = "";
  std::string params_file = "";
  bool model_from_memory_ = false;
  // format of input model
  ModelFormat model_format = ModelFormat::PADDLE;
  std::string model_buffer_ = "";
  std::string params_buffer_ = "";
  bool model_from_memory_ = false;
 };
 }  // namespace fastdeploy