[Serving][Backend] Backend support zero_copy_infer and Serving reduce the output memory copy (#703)

* backend add zero copy infer interface * fix bug * fix bug * fix bug * paddle ipu
2025-10-13 12:23:55 +08:00 · 2022-11-28 14:07:53 +08:00
parent edcf150d33
commit 42f1888bb0
21 changed files with 254 additions and 109 deletions
--- a/fastdeploy/backends/backend.h
+++ b/fastdeploy/backends/backend.h
@@ -62,8 +62,11 @@ class BaseBackend {
  virtual TensorInfo GetOutputInfo(int index) = 0;
  virtual std::vector<TensorInfo> GetInputInfos() = 0;
  virtual std::vector<TensorInfo> GetOutputInfos() = 0;
+  // if copy_to_fd is true, copy memory data to FDTensor
+  // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
  virtual bool Infer(std::vector<FDTensor>& inputs,
-                     std::vector<FDTensor>* outputs) = 0;
+                     std::vector<FDTensor>* outputs,
+                     bool copy_to_fd = true) = 0;
  virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
                                             int device_id = -1) {
    FDERROR << "Clone no support" << std::endl;