[Backend & Serving] Serving and Runtime support Clone (#464)

* Add Serving and Runtime use Clone * support TRT, OpenVINO and Paddle Backend Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-19 23:16:15 +08:00 · 2022-11-04 17:16:40 +08:00
parent 61634caf28
commit 277bec38c7
13 changed files with 343 additions and 150 deletions
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -102,19 +102,6 @@ std::string Str(const Backend& b) {
  return "UNKNOWN-Backend";
 }

-std::string Str(const ModelFormat& f) {
-  if (f == ModelFormat::PADDLE) {
-    return "ModelFormat::PADDLE";
-  } else if (f == ModelFormat::ONNX) {
-    return "ModelFormat::ONNX";
-  }else if (f == ModelFormat::RKNN) {
-    return "ModelFormat::RKNN";
-  } else if (f == ModelFormat::TORCHSCRIPT) {
-    return "ModelFormat::TORCHSCRIPT";
-  }
-  return "UNKNOWN-ModelFormat";
-}
-
 std::ostream& operator<<(std::ostream& out, const Backend& backend) {
  if (backend == Backend::ORT) {
    out << "Backend::ORT";
@@ -135,20 +122,6 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
  return out;
 }

-std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
-  if (format == ModelFormat::PADDLE) {
-    out << "ModelFormat::PADDLE";
-  } else if (format == ModelFormat::ONNX) {
-    out << "ModelFormat::ONNX";
-  } else if (format == ModelFormat::RKNN) {
-    out << "ModelFormat::RKNN";
-  } else if (format == ModelFormat::TORCHSCRIPT) {
-    out << "ModelFormat::TORCHSCRIPT";
-  }
-  out << "UNKNOWN-ModelFormat";
-  return out;
-}
-
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format) {
  if (model_format == ModelFormat::PADDLE) {
@@ -411,6 +384,10 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
  trt_serialize_file = cache_file_path;
 }

+void RuntimeOption::SetOpenVINOStreams(int num_streams) {
+  ov_num_streams = num_streams;
+}
+
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
                      const RuntimeOption& _option) {
 #ifdef ENABLE_POROS_BACKEND
@@ -582,6 +559,8 @@ bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
 void Runtime::CreatePaddleBackend() {
 #ifdef ENABLE_PADDLE_BACKEND
  auto pd_option = PaddleBackendOption();
+  pd_option.model_file = option.model_file;
+  pd_option.params_file = option.params_file;
  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
  pd_option.enable_log_info = option.pd_enable_log_info;
  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
@@ -642,6 +621,7 @@ void Runtime::CreateOpenVINOBackend() {
 #ifdef ENABLE_OPENVINO_BACKEND
  auto ov_option = OpenVINOBackendOption();
  ov_option.cpu_thread_num = option.cpu_thread_num;
+  ov_option.ov_num_streams = option.ov_num_streams;
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
@@ -699,6 +679,9 @@ void Runtime::CreateOrtBackend() {
 void Runtime::CreateTrtBackend() {
 #ifdef ENABLE_TRT_BACKEND
  auto trt_option = TrtBackendOption();
+  trt_option.model_file = option.model_file;
+  trt_option.params_file = option.params_file;
+  trt_option.model_format = option.model_format;
  trt_option.gpu_id = option.device_id;
  trt_option.enable_fp16 = option.trt_enable_fp16;
  trt_option.enable_int8 = option.trt_enable_int8;
@@ -771,4 +754,26 @@ void Runtime::CreateRKNPU2Backend() {
 #endif
 }

+Runtime* Runtime::Clone(void* stream, int device_id) {
+  Runtime* runtime = new Runtime();
+  if (option.backend != Backend::OPENVINO
+      && option.backend != Backend::PDINFER
+      && option.backend != Backend::TRT
+      ) {
+    runtime->Init(option);
+    FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
+                  clone engine to  reduce CPU/GPU memory usage now. For "
+              << option.backend
+              << ", FastDeploy will create a new engine which \
+                  will not share memory  with the current runtime."
+              << std::endl;
+    return runtime;
+  }
+  FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in " << Str(option.device)
+         << "." << std::endl;
+  runtime->option = option;
+  runtime->backend_ = backend_->Clone(stream, device_id);
+  return runtime;
+}
+
 }  // namespace fastdeploy