[backend][Serving]Fix paddle backend get outout tensor error (#741)

fix paddle backend no_copy_infer
2025-10-17 06:00:59 +08:00 · 2022-11-29 18:34:56 +08:00
parent b96c8a4146
commit 5dca3a45c9
7 changed files with 133 additions and 20 deletions
--- a/fastdeploy/backends/paddle/util.cc
+++ b/fastdeploy/backends/paddle/util.cc
@@ -87,14 +87,29 @@ void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
    int size = 0;
    // TODO(liqi): The tensor->data interface of paddle don't return device id
    //               and don't support return void*.
-    auto* out_data = tensor->data<uint8_t>(&place, &size);
+    void* out_data = nullptr;
+    if (fd_dtype == FDDataType::FP32) {
+      out_data = tensor->data<float>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT32) {
+      out_data = tensor->data<int>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT64) {
+      out_data = tensor->data<int64_t>(&place, &size);
+    } else if (fd_dtype == FDDataType::INT8) {
+      out_data = tensor->data<int8_t>(&place, &size);
+    } else if (fd_dtype == FDDataType::UINT8) {
+      out_data = tensor->data<uint8_t>(&place, &size);
+    } else {
+      FDASSERT(false, "Unexpected data type(%s) while infer shared with PaddleBackend.",
+          Str(fd_dtype).c_str());
+    }
    Device device = Device::CPU;
    if(place == paddle_infer::PlaceType::kGPU) {
      device = Device::GPU;
    }
+    fd_tensor->name = tensor->name();
    fd_tensor->SetExternalData(
        shape, fd_dtype,
-        reinterpret_cast<void*>(out_data), device);
+        out_data, device);
  }
 }

--- a/fastdeploy/pybind/fd_tensor.cc
+++ b/fastdeploy/pybind/fd_tensor.cc
@@ -181,7 +181,8 @@ void BindFDTensor(pybind11::module& m) {
      .def("from_numpy", [](FDTensor& self, pybind11::array& pyarray, bool share_buffer = false) {
        PyArrayToTensor(pyarray, &self, share_buffer);
      })
-      .def("to_dlpack", &FDTensorToDLPack);
+      .def("to_dlpack", &FDTensorToDLPack)
+      .def("print_info", &FDTensor::PrintInfo);
 }

 }  // namespace fastdeploy
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -128,12 +128,6 @@ void BindRuntime(pybind11::module& m) {
             }
             return self.Compile(warm_tensors, _option);
           })
-      .def("infer",
-           [](Runtime& self, std::vector<FDTensor>& inputs) {
-             std::vector<FDTensor> outputs(self.NumOutputs());
-             self.Infer(inputs, &outputs);
-             return outputs;
-           })
      .def("infer",
           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
             std::vector<FDTensor> inputs(data.size());
@@ -185,6 +179,17 @@ void BindRuntime(pybind11::module& m) {
        std::vector<FDTensor> outputs;
        return self.Infer(inputs, &outputs);
      })
+      .def("bind_input_tensor", &Runtime::BindInputTensor)
+      .def("infer", [](Runtime& self) {
+        self.Infer();
+      })
+      .def("get_output_tensor", [](Runtime& self, const std::string& name) {
+        FDTensor* output = self.GetOutputTensor(name);
+        if(output == nullptr) {
+          return pybind11::cast(nullptr);
+        }
+        return pybind11::cast(*output);
+      })
      .def("num_inputs", &Runtime::NumInputs)
      .def("num_outputs", &Runtime::NumOutputs)
      .def("get_input_info", &Runtime::GetInputInfo)
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -610,6 +610,7 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
      return &t;
    }
  }
+  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
  return nullptr;
 }

--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -57,11 +57,36 @@ class Runtime:
        """
        assert isinstance(data, dict) or isinstance(
            data, list), "The input data should be type of dict or list."
+        if isinstance(data, dict):
            for k, v in data.items():
-            if not v.data.contiguous:
+                if isinstance(v, np.ndarray) and not v.data.contiguous:
                    data[k] = np.ascontiguousarray(data[k])
+
        return self._runtime.infer(data)

+    def bind_input_tensor(self, name, fdtensor):
+        """Bind FDTensor by name, no copy and share input memory
+
+        :param name: (str)The name of input data.
+        :param fdtensor: (fastdeploy.FDTensor)The input FDTensor.
+        """
+        self._runtime.bind_input_tensor(name, fdtensor)
+
+    def zero_copy_infer(self):
+        """No params inference the model.
+
+        the input and output data need to pass through the bind_input_tensor and get_output_tensor interfaces.
+        """
+        self._runtime.infer()
+
+    def get_output_tensor(self, name):
+        """Get output FDTensor by name, no copy and share backend output memory
+
+        :param name: (str)The name of output data.
+        :return fastdeploy.FDTensor
+        """
+        return self._runtime.get_output_tensor(name)
+
    def compile(self, warm_datas):
        """[Only for Poros backend] compile with prewarm data for poros

@@ -178,7 +203,8 @@ class RuntimeOption:
    @long_to_int.setter
    def long_to_int(self, value):
        assert isinstance(
-            value, bool), "The value to set `long_to_int` must be type of bool."
+            value,
+            bool), "The value to set `long_to_int` must be type of bool."
        self._option.long_to_int = value

    @use_nvidia_tf32.setter
@@ -434,7 +460,8 @@ class RuntimeOption:
                continue
            if hasattr(getattr(self._option, attr), "__call__"):
                continue
-            message += "  {} : {}\t\n".format(attr, getattr(self._option, attr))
+            message += "  {} : {}\t\n".format(attr,
+                                              getattr(self._option, attr))
        message.strip("\n")
        message += ")"
        return message
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -37,7 +37,7 @@ nvidia-docker run -i --rm --name build_fd \
           nvcr.io/nvidia/tritonserver:21.10-py3-min \
           bash -c \
           'cd /workspace/fastdeploy/python;
-            rm -rf .setuptools-cmake-build dist;
+            rm -rf .setuptools-cmake-build dist build fastdeploy/libs/third_libs;
            apt-get update;
            apt-get install -y --no-install-recommends patchelf python3-dev python3-pip rapidjson-dev;
            ln -s /usr/bin/python3 /usr/bin/python;
@@ -75,7 +75,7 @@ docker run -i --rm --name build_fd \
           paddlepaddle/fastdeploy:21.10-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy/python;
-            rm -rf .setuptools-cmake-build dist;
+            rm -rf .setuptools-cmake-build dist build fastdeploy/libs/third_libs;
            ln -s /usr/bin/python3 /usr/bin/python;
            export WITH_GPU=OFF;
            export ENABLE_ORT_BACKEND=OFF;
--- a/tests/models/test_faster_rcnn.py
+++ b/tests/models/test_faster_rcnn.py
@@ -66,6 +66,7 @@ def test_detection_faster_rcnn():
 #    with open("faster_rcnn_baseline.pkl", "wb") as f:
 #        pickle.dump([np.array(result.boxes), np.array(result.scores), np.array(result.label_ids)], f)

+
 def test_detection_faster_rcnn1():
    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz"
    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
@@ -84,14 +85,17 @@ def test_detection_faster_rcnn1():
    option = rc.test_option
    option.set_model_path(model_file, params_file)
    option.use_paddle_infer_backend()
-    runtime = fd.Runtime(option);
+    runtime = fd.Runtime(option)

    # compare diff
-    im1 = cv2.imread("./resources/000000014439.jpg")
    for i in range(2):
        im1 = cv2.imread("./resources/000000014439.jpg")
        input_tensors = preprocessor.run([im1])
-        output_tensors = runtime.infer({"image": input_tensors[0], "scale_factor": input_tensors[1], "im_shape": input_tensors[2]})
+        output_tensors = runtime.infer({
+            "image": input_tensors[0],
+            "scale_factor": input_tensors[1],
+            "im_shape": input_tensors[2]
+        })
        results = postprocessor.run(output_tensors)
        result = results[0]

@@ -115,6 +119,66 @@ def test_detection_faster_rcnn1():
        assert diff_label_ids[scores > score_threshold].max(
        ) < 1e-04, "There's diff in label_ids."

+
+# test runtime.zero_copy_infer and bind_input_tensor get_output_tensor
+def test_detection_faster_rcnn2():
+    model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz"
+    input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
+    result_url = "https://bj.bcebos.com/fastdeploy/tests/data/faster_rcnn_baseline.pkl"
+    fd.download_and_decompress(model_url, "resources")
+    fd.download(input_url1, "resources")
+    fd.download(result_url, "resources")
+    model_path = "resources/faster_rcnn_r50_vd_fpn_2x_coco"
+
+    model_file = os.path.join(model_path, "model.pdmodel")
+    params_file = os.path.join(model_path, "model.pdiparams")
+    config_file = os.path.join(model_path, "infer_cfg.yml")
+    preprocessor = fd.vision.detection.PaddleDetPreprocessor(config_file)
+    postprocessor = fd.vision.detection.PaddleDetPostprocessor()
+
+    option = rc.test_option
+    option.set_model_path(model_file, params_file)
+    option.use_paddle_infer_backend()
+    runtime = fd.Runtime(option)
+
+    # compare diff
+    input_names = ["image", "scale_factor", "im_shape"]
+    output_names = ["concat_12.tmp_0", "concat_8.tmp_0"]
+    for i in range(2):
+        im1 = cv2.imread("./resources/000000014439.jpg")
+        input_tensors = preprocessor.run([im1.copy(), ])
+        for i, input_tensor in enumerate(input_tensors):
+            runtime.bind_input_tensor(input_names[i], input_tensor)
+        runtime.zero_copy_infer()
+        output_tensors = []
+        for name in output_names:
+            output_tensor = runtime.get_output_tensor(name)
+            output_tensors.append(output_tensor)
+        results = postprocessor.run(output_tensors)
+        result = results[0]
+
+        with open("resources/faster_rcnn_baseline.pkl", "rb") as f:
+            boxes, scores, label_ids = pickle.load(f)
+        pred_boxes = np.array(result.boxes)
+        pred_scores = np.array(result.scores)
+        pred_label_ids = np.array(result.label_ids)
+
+        diff_boxes = np.fabs(boxes - pred_boxes)
+        diff_scores = np.fabs(scores - pred_scores)
+        diff_label_ids = np.fabs(label_ids - pred_label_ids)
+
+        print(diff_boxes.max(), diff_scores.max(), diff_label_ids.max())
+
+        score_threshold = 0.0
+        assert diff_boxes[scores > score_threshold].max(
+        ) < 1e-04, "There's diff in boxes."
+        assert diff_scores[scores > score_threshold].max(
+        ) < 1e-04, "There's diff in scores."
+        assert diff_label_ids[scores > score_threshold].max(
+        ) < 1e-04, "There's diff in label_ids."
+
+
 if __name__ == "__main__":
    test_detection_faster_rcnn()
    test_detection_faster_rcnn1()
+    test_detection_faster_rcnn2()