[Backend & Serving] Serving and Runtime support Clone (#464)

* Add Serving and Runtime use Clone * support TRT, OpenVINO and Paddle Backend Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-06 17:17:14 +08:00 · 2022-11-04 17:16:40 +08:00
parent 61634caf28
commit 277bec38c7
13 changed files with 343 additions and 150 deletions
--- a/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -285,6 +285,7 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
    BuildTrtEngine();
  }

+  cudaSetDevice(option_.gpu_id);
  SetInputs(inputs);
  AllocateOutputsBuffer(outputs);

@@ -356,13 +357,17 @@ void TrtBackend::GetInputOutputInfo() {
      outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
      casted_output_tensors_[name] = FDTensor();
    }
+    io_name_index_[name] = i;
  }
  bindings_.resize(num_binds);
 }

 void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
  for (const auto& item : inputs) {
-    auto idx = engine_->getBindingIndex(item.name.c_str());
+    // auto idx = engine_->getBindingIndex(item.name.c_str());
+    auto iter = io_name_index_.find(item.name);
+    FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str());
+    auto idx = iter->second; 
    std::vector<int> shape(item.shape.begin(), item.shape.end());
    auto dims = ToDims(shape);
    context_->setBindingDimensions(idx, dims);
@@ -410,7 +415,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs) {
    outputs->resize(outputs_desc_.size());
  }
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
+    // auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
+    auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
+    FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str());
+    auto idx = idx_iter->second; 
    auto output_dims = context_->getBindingDimensions(idx);

    // find the original index of output
@@ -673,4 +681,47 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
  return infos;
 }

+std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
+  std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
+  auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
+  if(device_id > 0 && device_id != option_.gpu_id) {
+    auto clone_option = option_;
+    clone_option.gpu_id = device_id;
+    clone_option.external_stream_ = stream;
+    if (option_.model_format == ModelFormat::ONNX) {
+      FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
+              "Clone model from ONNX failed while initialize TrtBackend.");
+    } else {
+      FDASSERT(casted_backend->InitFromPaddle(option_.model_file,
+                                              option_.params_file, clone_option),
+              "Clone model from Paddle failed while initialize TrtBackend.");
+    }
+    FDWARNING << "The target device id:" 
+          << device_id
+          << " is different from current device id:"
+          << option_.gpu_id
+          << ", cannot share memory with current engine."
+          << std::endl;
+    return new_backend;
+  }
+  cudaSetDevice(option_.gpu_id);
+  casted_backend->option_.gpu_id = option_.gpu_id;
+  if (stream) {
+    casted_backend->stream_ = reinterpret_cast<cudaStream_t>(stream);
+  } else {
+    FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0,
+           "[ERROR] Error occurs while clone calling cudaStreamCreate().");
+  }
+  casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
+  casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
+  casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end());
+  casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end());
+  casted_backend->engine_ = engine_;
+  casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
+      casted_backend->engine_->createExecutionContext());
+  casted_backend->GetInputOutputInfo();
+  FDINFO << "TRTBackend clone finish." << std::endl;
+  return new_backend;
+}
+
 }  // namespace fastdeploy