[Serving][Backend] Backend support zero_copy_infer and Serving reduce the output memory copy (#703)

* backend add zero copy infer interface * fix bug * fix bug * fix bug * paddle ipu
2025-10-16 05:30:58 +08:00 · 2022-11-28 14:07:53 +08:00
parent edcf150d33
commit 42f1888bb0
21 changed files with 254 additions and 109 deletions
--- a/fastdeploy/backends/ort/ort_backend.h
+++ b/fastdeploy/backends/ort/ort_backend.h
@@ -68,7 +68,8 @@ class OrtBackend : public BaseBackend {
                    bool from_memory_buffer = false);

  bool Infer(std::vector<FDTensor>& inputs,
-             std::vector<FDTensor>* outputs) override;
+             std::vector<FDTensor>* outputs,
+             bool copy_to_fd = true) override;

  int NumInputs() const override { return inputs_desc_.size(); }

@@ -92,7 +93,7 @@ class OrtBackend : public BaseBackend {
  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
 #endif
  OrtBackendOption option_;
-  void CopyToCpu(const Ort::Value& value, FDTensor* tensor,
-                 const std::string& name);
+  void OrtValueToFDTensor(const Ort::Value& value, FDTensor* tensor,
+                          const std::string& name, bool copy_to_fd);
 };
 }  // namespace fastdeploy