[Model] Add Paddle3D smoke model (#1766)

* add smoke model * add 3d vis * update code * update doc * mv paddle3d from detection to perception * update result for velocity * update code for CI * add set input data for TRT backend * add serving support for smoke model * update code * update code * update code --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-08 10:00:29 +08:00 · 2023-04-14 16:30:56 +08:00
parent 81fbd54c9d
commit a509dd8ec1
53 changed files with 2610 additions and 26 deletions
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -226,10 +226,17 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
      std::map<std::string, std::vector<int>> min_shape;
      std::map<std::string, std::vector<int>> opt_shape;
      GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape);
+      std::map<std::string, std::vector<float>> max_input_data;
+      std::map<std::string, std::vector<float>> min_input_data;
+      std::map<std::string, std::vector<float>> opt_input_data;
+      if (!option.trt_option.min_input_data.empty()) {
+        GetInputDataFromOption(option, &max_input_data, &min_input_data,
+                               &opt_input_data);
+      }
      // Need to run once to get the shape range info file.
-      CollectShapeRun(predictor_tmp.get(), max_shape);
-      CollectShapeRun(predictor_tmp.get(), min_shape);
-      CollectShapeRun(predictor_tmp.get(), opt_shape);
+      CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data);
+      CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data);
+      CollectShapeRun(predictor_tmp.get(), opt_shape, min_input_data);
      FDINFO << "Finish generating shape range info file." << std::endl;
    }
    FDINFO << "Start loading shape range info file " << shape_range_info
@@ -400,9 +407,33 @@ void PaddleBackend::GetDynamicShapeFromOption(
  }
 }

+void PaddleBackend::GetInputDataFromOption(
+    const PaddleBackendOption& option,
+    std::map<std::string, std::vector<float>>* max_input_data,
+    std::map<std::string, std::vector<float>>* min_input_data,
+    std::map<std::string, std::vector<float>>* opt_input_data) const {
+  for (const auto& item : option.trt_option.min_input_data) {
+    auto max_iter = option.trt_option.max_input_data.find(item.first);
+    auto opt_iter = option.trt_option.opt_input_data.find(item.first);
+    FDASSERT(max_iter != option.trt_option.max_input_data.end(),
+             "Cannot find %s in TrtBackendOption::min_input_data.",
+             item.first.c_str());
+    FDASSERT(opt_iter != option.trt_option.opt_input_data.end(),
+             "Cannot find %s in TrtBackendOption::opt_input_data.",
+             item.first.c_str());
+    (*max_input_data)[item.first].assign(max_iter->second.begin(),
+                                         max_iter->second.end());
+    (*opt_input_data)[item.first].assign(opt_iter->second.begin(),
+                                         opt_iter->second.end());
+    (*min_input_data)[item.first].assign(item.second.begin(),
+                                         item.second.end());
+  }
+}
+
 void PaddleBackend::CollectShapeRun(
    paddle_infer::Predictor* predictor,
-    const std::map<std::string, std::vector<int>>& shape) const {
+    const std::map<std::string, std::vector<int>>& shape,
+    const std::map<std::string, std::vector<float>>& data) const {
  auto input_names = predictor->GetInputNames();
  auto input_type = predictor->GetInputTypes();
  for (const auto& name : input_names) {
@@ -418,21 +449,47 @@ void PaddleBackend::CollectShapeRun(
    int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1,
                                    std::multiplies<int>());
    tensor->Reshape(shape_value);
+
+    if (data.find(name) != data.end()) {
+      FDASSERT(data.at(name).size() == shape_num,
+               "The data num and accumulate of shape must be equal for input: "
+               "[\"%s\"], "
+               " When Use the (C++)RuntimeOption.trt_option.SetInputData/ "
+               " (Python)RuntimeOption.trt_option.set_input_data/",
+               name.c_str());
+    }
+
    auto dtype = input_type[name];
    switch (dtype) {
      case paddle_infer::DataType::FLOAT32: {
-        std::vector<float> input_data(shape_num, 1.0);
-        tensor->CopyFromCpu(input_data.data());
+        if (data.find(name) != data.end()) {
+          tensor->CopyFromCpu(data.at(name).data());
+        } else {
+          std::vector<float> input_data(shape_num, 1.0);
+          tensor->CopyFromCpu(input_data.data());
+        }
        break;
      }
      case paddle_infer::DataType::INT32: {
-        std::vector<int> input_data(shape_num, 1);
-        tensor->CopyFromCpu(input_data.data());
+        if (data.find(name) != data.end()) {
+          std::vector<int> input_data(data.at(name).begin(),
+                                      data.at(name).end());
+          tensor->CopyFromCpu(input_data.data());
+        } else {
+          std::vector<int> input_data(shape_num, 1);
+          tensor->CopyFromCpu(input_data.data());
+        }
        break;
      }
      case paddle_infer::DataType::INT64: {
-        std::vector<int64_t> input_data(shape_num, 1);
-        tensor->CopyFromCpu(input_data.data());
+        if (data.find(name) != data.end()) {
+          std::vector<int64_t> input_data(data.at(name).begin(),
+                                          data.at(name).end());
+          tensor->CopyFromCpu(input_data.data());
+        } else {
+          std::vector<int64_t> input_data(shape_num, 1);
+          tensor->CopyFromCpu(input_data.data());
+        }
        break;
      }
      default: {
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -81,12 +81,18 @@ class PaddleBackend : public BaseBackend {

  void
  CollectShapeRun(paddle_infer::Predictor* predictor,
-                  const std::map<std::string, std::vector<int>>& shape) const;
+                  const std::map<std::string, std::vector<int>>& shape,
+                  const std::map<std::string, std::vector<float>>& data) const;
  void GetDynamicShapeFromOption(
      const PaddleBackendOption& option,
      std::map<std::string, std::vector<int>>* max_shape,
      std::map<std::string, std::vector<int>>* min_shape,
      std::map<std::string, std::vector<int>>* opt_shape) const;
+  void GetInputDataFromOption(
+    const PaddleBackendOption& option,
+    std::map<std::string, std::vector<float>>* max_input_data,
+    std::map<std::string, std::vector<float>>* min_input_data,
+    std::map<std::string, std::vector<float>>* opt_input_data) const;
  void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option);
  PaddleBackendOption option_;
  paddle_infer::Config config_;
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
@@ -33,8 +33,9 @@ struct TrtBackendOption {
  /// Enable log while converting onnx model to tensorrt
  bool enable_log_info = false;

-  
-  /// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
+
+  /// Enable half precison inference, on some device not support half precision,
+  /// it will fallback to float32 mode
  bool enable_fp16 = false;

  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
@@ -63,9 +64,44 @@ struct TrtBackendOption {
      max_shape[tensor_name].assign(max.begin(), max.end());
    }
  }
-  /// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
+
+    /** \brief Set Input data for input tensor for the model  while using TensorRT backend
+   *
+   * \param[in] tensor_name The name of input for the model which is dynamic shape
+   * \param[in] min_data The input data for minimal shape for the input tensor
+   * \param[in] opt_data The input data for optimized shape for the input tensor
+   * \param[in] max_data The input data for maximum shape for the input tensor, if set as default value, it will keep same with min_data
+   */
+  void SetInputData(const std::string& tensor_name,
+                const std::vector<float> min_data,
+                const std::vector<float> opt_data = std::vector<float>(),
+                const std::vector<float> max_data = std::vector<float>()) {
+    max_input_data[tensor_name].clear();
+    min_input_data[tensor_name].clear();
+    opt_input_data[tensor_name].clear();
+    min_input_data[tensor_name].assign(min_data.begin(), min_data.end());
+    if (opt_data.empty()) {
+      opt_input_data[tensor_name].assign(min_data.begin(), min_data.end());
+    } else {
+      opt_input_data[tensor_name].assign(opt_data.begin(), opt_data.end());
+    }
+    if (max_data.empty()) {
+      max_input_data[tensor_name].assign(min_data.begin(), min_data.end());
+    } else {
+      max_input_data[tensor_name].assign(max_data.begin(), max_data.end());
+    }
+  }
+
+  /// Set cache file path while use TensorRT backend.
+  /// Loadding a Paddle/ONNX model and initialize TensorRT will
+  /// take a long time,
+  /// by this interface it will save the tensorrt engine to `cache_file_path`,
+  /// and load it directly while execute the code again
  std::string serialize_file = "";

+  std::map<std::string, std::vector<float>> max_input_data;
+  std::map<std::string, std::vector<float>> min_input_data;
+  std::map<std::string, std::vector<float>> opt_input_data;
  // The below parameters may be removed in next version, please do not
  // visit or use them directly
  std::map<std::string, std::vector<int32_t>> max_shape;
--- a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
+++ b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
@@ -26,7 +26,8 @@ void BindTrtOption(pybind11::module& m) {
      .def_readwrite("max_workspace_size",
                     &TrtBackendOption::max_workspace_size)
      .def_readwrite("serialize_file", &TrtBackendOption::serialize_file)
-      .def("set_shape", &TrtBackendOption::SetShape);
+      .def("set_shape", &TrtBackendOption::SetShape)
+      .def("set_input_data", &TrtBackendOption::SetInputData);
 }

 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -379,6 +379,17 @@ void RuntimeOption::SetTrtInputShape(const std::string& input_name,
  trt_option.SetShape(input_name, min_shape, opt_shape, max_shape);
 }

+void RuntimeOption::SetTrtInputData(const std::string& input_name,
+                                    const std::vector<float>& min_shape_data,
+                                    const std::vector<float>& opt_shape_data,
+                                    const std::vector<float>& max_shape_data) {
+  FDWARNING << "`RuntimeOption::SetTrtInputData` will be removed in v1.2.0, "
+               "please use `RuntimeOption.trt_option.SetInputData()` instead."
+            << std::endl;
+  trt_option.SetInputData(input_name, min_shape_data, opt_shape_data,
+                          max_shape_data);
+}
+
 void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
  FDWARNING << "`RuntimeOption::SetTrtMaxWorkspaceSize` will be removed in "
               "v1.2.0, please modify its member variable directly, e.g "
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -257,6 +257,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
      const std::string& input_name, const std::vector<int32_t>& min_shape,
      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
+
+  void SetTrtInputData(
+      const std::string& input_name, const std::vector<float>& min_shape_data,
+      const std::vector<float>& opt_shape_data = std::vector<float>(),
+      const std::vector<float>& max_shape_data = std::vector<float>());
+
  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
  void SetTrtMaxBatchSize(size_t max_batch_size);
  void EnableTrtFP16();
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -32,6 +32,7 @@
 #include "fastdeploy/vision/detection/contrib/yolov8/yolov8.h"
 #include "fastdeploy/vision/detection/contrib/yolox.h"
 #include "fastdeploy/vision/detection/contrib/rknpu2/model.h"
+#include "fastdeploy/vision/perception/paddle3d/smoke/smoke.h"
 #include "fastdeploy/vision/detection/ppdet/model.h"
 #include "fastdeploy/vision/facealign/contrib/face_landmark_1000.h"
 #include "fastdeploy/vision/facealign/contrib/pfld.h"
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -166,6 +166,96 @@ std::string DetectionResult::Str() {
  return out;
 }

+// PerceptionResult -----------------------------------------------------
+PerceptionResult::PerceptionResult(const PerceptionResult& res) {
+  scores.assign(res.scores.begin(), res.scores.end());
+  label_ids.assign(res.label_ids.begin(), res.label_ids.end());
+  boxes.assign(res.boxes.begin(), res.boxes.end());
+  center.assign(res.center.begin(), res.center.end());
+  observation_angle.assign(res.observation_angle.begin(),
+                           res.observation_angle.end());
+  yaw_angle.assign(res.yaw_angle.begin(), res.yaw_angle.end());
+  velocity.assign(res.velocity.begin(), res.velocity.end());
+}
+
+PerceptionResult& PerceptionResult::operator=(PerceptionResult&& other) {
+  if (&other != this) {
+    scores = std::move(other.scores);
+    label_ids = std::move(other.label_ids);
+    boxes = std::move(other.boxes);
+    center = std::move(other.center);
+    observation_angle = std::move(other.observation_angle);
+    yaw_angle = std::move(other.yaw_angle);
+    velocity = std::move(other.velocity);
+  }
+  return *this;
+}
+
+void PerceptionResult::Free() {
+  std::vector<float>().swap(scores);
+  std::vector<int32_t>().swap(label_ids);
+  std::vector<std::array<float, 7>>().swap(boxes);
+  std::vector<std::array<float, 3>>().swap(center);
+  std::vector<float>().swap(observation_angle);
+  std::vector<float>().swap(yaw_angle);
+  std::vector<std::array<float, 3>>().swap(velocity);
+}
+
+void PerceptionResult::Clear() {
+  scores.clear();
+  label_ids.clear();
+  boxes.clear();
+  center.clear();
+  observation_angle.clear();
+  yaw_angle.clear();
+  velocity.clear();
+}
+
+void PerceptionResult::Reserve(int size) {
+  scores.reserve(size);
+  label_ids.reserve(size);
+  boxes.reserve(size);
+  center.reserve(size);
+  observation_angle.reserve(size);
+  yaw_angle.reserve(size);
+  velocity.reserve(size);
+}
+
+void PerceptionResult::Resize(int size) {
+  scores.resize(size);
+  label_ids.resize(size);
+  boxes.resize(size);
+  center.resize(size);
+  observation_angle.resize(size);
+  yaw_angle.resize(size);
+  velocity.resize(size);
+}
+
+std::string PerceptionResult::Str() {
+  std::string out;
+  out =
+      "PerceptionResult: [xmin, ymin, xmax, ymax, w, h, l, cx, cy, cz, "
+      "yaw_angle, "
+      "ob_angle, score, label_id]\n";
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    out = out + std::to_string(boxes[i][0]) + "," +
+          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+          ", " + std::to_string(boxes[i][3]) + ", " +
+          std::to_string(boxes[i][4]) + ", " + std::to_string(boxes[i][5]) +
+          ", " + std::to_string(boxes[i][6]) + ", " +
+          std::to_string(center[i][0]) + ", " + std::to_string(center[i][1]) +
+          ", " + std::to_string(center[i][2]) + ", " +
+          std::to_string(yaw_angle[i]) + ", " +
+          std::to_string(observation_angle[i]) + ", " +
+          std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]);
+
+    out += "\n";
+  }
+  return out;
+}
+
+// PerceptionResult finished
+
 void KeyPointDetectionResult::Free() {
  std::vector<std::array<float, 2>>().swap(keypoints);
  std::vector<float>().swap(scores);
@@ -531,8 +621,8 @@ std::string OCRResult::Str() {
      out = out + "]";

      if (rec_scores.size() > 0) {
-        out = out + "rec text: " + text[n] + " rec score:" +
-              std::to_string(rec_scores[n]) + " ";
+        out = out + "rec text: " + text[n] +
+              " rec score:" + std::to_string(rec_scores[n]) + " ";
      }
      if (cls_labels.size() > 0) {
        out = out + "cls label: " + std::to_string(cls_labels[n]) +
@@ -546,8 +636,8 @@ std::string OCRResult::Str() {
             cls_scores.size() > 0) {
    std::string out;
    for (int i = 0; i < rec_scores.size(); i++) {
-      out = out + "rec text: " + text[i] + " rec score:" +
-            std::to_string(rec_scores[i]) + " ";
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
      out = out + "cls label: " + std::to_string(cls_labels[i]) +
            " cls score: " + std::to_string(cls_scores[i]);
      out = out + "\n";
@@ -566,8 +656,8 @@ std::string OCRResult::Str() {
             cls_scores.size() == 0) {
    std::string out;
    for (int i = 0; i < rec_scores.size(); i++) {
-      out = out + "rec text: " + text[i] + " rec score:" +
-            std::to_string(rec_scores[i]) + " ";
+      out = out + "rec text: " + text[i] +
+            " rec score:" + std::to_string(rec_scores[i]) + " ";
      out = out + "\n";
    }
    return out;
@@ -589,9 +679,9 @@ std::string HeadPoseResult::Str() {
  std::string out;

  out = "HeadPoseResult: [yaw, pitch, roll]\n";
-  out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + "pitch: " +
-        std::to_string(euler_angles[1]) + "\n" + "roll: " +
-        std::to_string(euler_angles[2]) + "\n";
+  out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" +
+        "pitch: " + std::to_string(euler_angles[1]) + "\n" +
+        "roll: " + std::to_string(euler_angles[2]) + "\n";
  return out;
 }

--- a/fastdeploy/vision/common/result.h
+++ b/fastdeploy/vision/common/result.h
@@ -140,6 +140,44 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
  std::string Str();
 };

+/*! @brief Detection result structure for all the object detection models and instance segmentation models
+ */
+struct FASTDEPLOY_DECL PerceptionResult : public BaseResult {
+  PerceptionResult() = default;
+
+  std::vector<float> scores;
+
+  std::vector<int32_t> label_ids;
+  // xmin, ymin, xmax, ymax, h, w, l
+  std::vector<std::array<float, 7>> boxes;
+  // cx, cy, cz
+  std::vector<std::array<float, 3>> center;
+
+  std::vector<float> observation_angle;
+
+  std::vector<float> yaw_angle;
+  // vx, vy, vz
+  std::vector<std::array<float, 3>> velocity;
+
+  /// Copy constructor
+  PerceptionResult(const PerceptionResult& res);
+  /// Move assignment
+  PerceptionResult& operator=(PerceptionResult&& other);
+
+  /// Clear PerceptionResult
+  void Clear();
+
+  /// Clear PerceptionResult and free the memory
+  void Free();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  /// Debug function, convert the result to string to print
+  std::string Str();
+};
+
 /*! @brief KeyPoint Detection result structure for all the keypoint detection models
 */
 struct FASTDEPLOY_DECL KeyPointDetectionResult : public BaseResult {
--- a/fastdeploy/vision/perception/paddle3d/smoke/postprocessor.cc
+++ b/fastdeploy/vision/perception/paddle3d/smoke/postprocessor.cc
@@ -0,0 +1,58 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/perception/paddle3d/smoke/postprocessor.h"
+
+#include "fastdeploy/vision/utils/utils.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace perception {
+
+SmokePostprocessor::SmokePostprocessor() {}
+
+bool SmokePostprocessor::Run(const std::vector<FDTensor>& tensors,
+                             std::vector<PerceptionResult>* results) {
+  results->resize(1);
+  (*results)[0].Clear();
+  (*results)[0].Reserve(tensors[0].shape[0]);
+  if (tensors[0].dtype != FDDataType::FP32) {
+    FDERROR << "Only support post process with float32 data." << std::endl;
+    return false;
+  }
+  const float* data = reinterpret_cast<const float*>(tensors[0].Data());
+  auto result = &(*results)[0];
+  for (int i = 0; i < tensors[0].shape[0] * tensors[0].shape[1]; i += 14) {
+    // item 1       :  class
+    // item 2       :  observation angle α
+    // item 3 ~ 6   :  box2d x1, y1, x2, y2
+    // item 7 ~ 9   :  box3d h, w, l
+    // item 10 ~ 12 :  box3d bottom center x, y, z
+    // item 13      :  box3d yaw angle
+    // item 14      :  score
+    std::vector<float> vec(data + i, data + i + 14);
+    result->scores.push_back(vec[13]);
+    result->label_ids.push_back(vec[0]);
+    result->boxes.emplace_back(std::array<float, 7>{
+        vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8]});
+    result->center.emplace_back(std::array<float, 3>{vec[9], vec[10], vec[11]});
+    result->observation_angle.push_back(vec[1]);
+    result->yaw_angle.push_back(vec[12]);
+  }
+  return true;
+}
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/postprocessor.h
+++ b/fastdeploy/vision/perception/paddle3d/smoke/postprocessor.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace perception {
+/*! @brief Postprocessor object for Smoke serials model.
+ */
+class FASTDEPLOY_DECL SmokePostprocessor {
+ public:
+  /** \brief Create a postprocessor instance for Smoke serials model
+   */
+  SmokePostprocessor();
+
+  /** \brief Process the result of runtime and fill to PerceptionResult structure
+   *
+   * \param[in] tensors The inference result from runtime
+   * \param[in] result The output result of detection
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
+   * \return true if the postprocess successed, otherwise false
+   */
+  bool Run(const std::vector<FDTensor>& tensors,
+      std::vector<PerceptionResult>* results);
+
+
+ protected:
+  float conf_threshold_;
+};
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/preprocessor.cc
+++ b/fastdeploy/vision/perception/paddle3d/smoke/preprocessor.cc
@@ -0,0 +1,161 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/perception/paddle3d/smoke/preprocessor.h"
+
+#include "fastdeploy/function/concat.h"
+#include "yaml-cpp/yaml.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace perception {
+
+SmokePreprocessor::SmokePreprocessor(const std::string& config_file) {
+  config_file_ = config_file;
+  FDASSERT(BuildPreprocessPipelineFromConfig(),
+           "Failed to create Paddle3DDetPreprocessor.");
+  initialized_ = true;
+}
+
+bool SmokePreprocessor::BuildPreprocessPipelineFromConfig() {
+  processors_.clear();
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file_);
+  } catch (YAML::BadFile& e) {
+    FDERROR << "Failed to load yaml file " << config_file_
+            << ", maybe you should check this file." << std::endl;
+    return false;
+  }
+
+  // read for preprocess
+  processors_.push_back(std::make_shared<BGR2RGB>());
+
+  bool has_permute = false;
+  for (const auto& op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "NormalizeImage") {
+      auto mean = op["mean"].as<std::vector<float>>();
+      auto std = op["std"].as<std::vector<float>>();
+      bool is_scale = true;
+      if (op["is_scale"]) {
+        is_scale = op["is_scale"].as<bool>();
+      }
+      std::string norm_type = "mean_std";
+      if (op["norm_type"]) {
+        norm_type = op["norm_type"].as<std::string>();
+      }
+      if (norm_type != "mean_std") {
+        std::fill(mean.begin(), mean.end(), 0.0);
+        std::fill(std.begin(), std.end(), 1.0);
+      }
+      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+    } else if (op_name == "Resize") {
+      bool keep_ratio = op["keep_ratio"].as<bool>();
+      auto target_size = op["target_size"].as<std::vector<int>>();
+      int interp = op["interp"].as<int>();
+      FDASSERT(target_size.size() == 2,
+               "Require size of target_size be 2, but now it's %lu.",
+               target_size.size());
+      if (!keep_ratio) {
+        int width = target_size[1];
+        int height = target_size[0];
+        processors_.push_back(
+            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
+      } else {
+        int min_target_size = std::min(target_size[0], target_size[1]);
+        int max_target_size = std::max(target_size[0], target_size[1]);
+        std::vector<int> max_size;
+        if (max_target_size > 0) {
+          max_size.push_back(max_target_size);
+          max_size.push_back(max_target_size);
+        }
+        processors_.push_back(std::make_shared<ResizeByShort>(
+            min_target_size, interp, true, max_size));
+      }
+    } else if (op_name == "Permute") {
+      // Do nothing, do permute as the last operation
+      has_permute = true;
+      continue;
+    } else {
+      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
+              << std::endl;
+      return false;
+    }
+  }
+  if (!disable_permute_) {
+    if (has_permute) {
+      // permute = cast<float> + HWC2CHW
+      processors_.push_back(std::make_shared<Cast>("float"));
+      processors_.push_back(std::make_shared<HWC2CHW>());
+    }
+  }
+
+  // Fusion will improve performance
+  FuseTransforms(&processors_);
+
+  input_k_data_ = cfg["k_data"].as<std::vector<float>>();
+  input_ratio_data_ = cfg["ratio_data"].as<std::vector<float>>();
+  return true;
+}
+
+bool SmokePreprocessor::Apply(FDMatBatch* image_batch,
+                              std::vector<FDTensor>* outputs) {
+  if (image_batch->mats->empty()) {
+    FDERROR << "The size of input images should be greater than 0."
+            << std::endl;
+    return false;
+  }
+  if (!initialized_) {
+    FDERROR << "The preprocessor is not initialized." << std::endl;
+    return false;
+  }
+  // There are 3 outputs, image, k_data, ratio_data
+  outputs->resize(3);
+  int batch = static_cast<int>(image_batch->mats->size());
+
+  // Allocate memory for k_data
+  (*outputs)[2].Resize({batch, 3, 3}, FDDataType::FP32);
+
+  // Allocate memory for ratio_data
+  (*outputs)[0].Resize({batch, 2}, FDDataType::FP32);
+
+  auto* k_data_ptr = reinterpret_cast<float*>((*outputs)[2].MutableData());
+
+  auto* ratio_data_ptr = reinterpret_cast<float*>((*outputs)[0].MutableData());
+
+  for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+    FDMat* mat = &(image_batch->mats->at(i));
+    for (size_t j = 0; j < processors_.size(); ++j) {
+      if (!(*(processors_[j].get()))(mat)) {
+        FDERROR << "Failed to processs image:" << i << " in "
+                << processors_[j]->Name() << "." << std::endl;
+        return false;
+      }
+    }
+
+    memcpy(k_data_ptr + i * 9, input_k_data_.data(), 9 * sizeof(float));
+    memcpy(ratio_data_ptr + i * 2, input_ratio_data_.data(), 2 * sizeof(float));
+  }
+
+  FDTensor* tensor = image_batch->Tensor();
+  (*outputs)[1].SetExternalData(tensor->Shape(), tensor->Dtype(),
+                                tensor->Data(), tensor->device,
+                                tensor->device_id);
+  return true;
+}
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/preprocessor.h
+++ b/fastdeploy/vision/perception/paddle3d/smoke/preprocessor.h
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "fastdeploy/vision/common/processors/manager.h"
+#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/result.h"
+
+namespace fastdeploy {
+namespace vision {
+
+namespace perception {
+/*! @brief Preprocessor object for Smoke serials model.
+ */
+class FASTDEPLOY_DECL SmokePreprocessor : public ProcessorManager  {
+ public:
+  SmokePreprocessor() = default;
+  /** \brief Create a preprocessor instance for Smoke model
+   *
+   * \param[in] config_file Path of configuration file for deployment, e.g smoke/infer_cfg.yml
+   */
+  explicit SmokePreprocessor(const std::string& config_file);
+
+  /** \brief Process the input image and prepare input tensors for runtime
+   *
+   * \param[in] images The input image data list, all the elements are returned by cv::imread()
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \param[in] ims_info The shape info list, record input_shape and output_shape
+   * \return true if the preprocess successed, otherwise false
+   */
+  bool Apply(FDMatBatch* image_batch, std::vector<FDTensor>* outputs);
+
+ protected:
+  bool BuildPreprocessPipelineFromConfig();
+  std::vector<std::shared_ptr<Processor>> processors_;
+
+  bool disable_permute_ = false;
+
+  bool initialized_ = false;
+
+  std::string config_file_;
+
+  std::vector<float> input_k_data_;
+
+  std::vector<float> input_ratio_data_;
+};
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/smoke.cc
+++ b/fastdeploy/vision/perception/paddle3d/smoke/smoke.cc
@@ -0,0 +1,82 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/perception/paddle3d/smoke/smoke.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace perception {
+
+Smoke::Smoke(const std::string& model_file, const std::string& params_file,
+             const std::string& config_file, const RuntimeOption& custom_option,
+             const ModelFormat& model_format)
+    : preprocessor_(config_file) {
+  valid_cpu_backends = {Backend::PDINFER};
+  valid_gpu_backends = {Backend::PDINFER};
+
+  runtime_option = custom_option;
+  runtime_option.model_format = model_format;
+  runtime_option.model_file = model_file;
+  runtime_option.params_file = params_file;
+  initialized = Initialize();
+}
+
+bool Smoke::Initialize() {
+  if (!InitRuntime()) {
+    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool Smoke::Predict(const cv::Mat& im, PerceptionResult* result) {
+  std::vector<PerceptionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  if (results.size()) {
+    *result = std::move(results[0]);
+  }
+  return true;
+}
+
+bool Smoke::BatchPredict(const std::vector<cv::Mat>& images,
+                         std::vector<PerceptionResult>* results) {
+  std::vector<FDMat> fd_images = WrapMat(images);
+
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+    FDERROR << "Failed to preprocess the input image." << std::endl;
+    return false;
+  }
+
+  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
+  reused_input_tensors_[1].name = InputInfoOfRuntime(1).name;
+  reused_input_tensors_[2].name = InputInfoOfRuntime(2).name;
+
+  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
+    FDERROR << "Failed to inference by runtime." << std::endl;
+    return false;
+  }
+
+  if (!postprocessor_.Run(reused_output_tensors_, results)) {
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/smoke.h
+++ b/fastdeploy/vision/perception/paddle3d/smoke/smoke.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.  //NOLINT
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/vision/perception/paddle3d/smoke/preprocessor.h"
+#include "fastdeploy/vision/perception/paddle3d/smoke/postprocessor.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace perception {
+/*! @brief smoke model object used when to load a smoke model exported by smoke.
+ */
+class FASTDEPLOY_DECL Smoke : public FastDeployModel {
+ public:
+  /** \brief  Set path of model file and the configuration of runtime.
+   *
+   * \param[in] model_file Path of model file, e.g smoke/model.pdiparams
+   * \param[in] params_file Path of parameter file, e.g smoke/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+   * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in "valid_cpu_backends"
+   * \param[in] model_format Model format of the loaded model, default is Paddle format
+   */
+  Smoke(const std::string& model_file, const std::string& params_file,
+         const std::string& config_file,
+         const RuntimeOption& custom_option = RuntimeOption(),
+         const ModelFormat& model_format = ModelFormat::PADDLE);
+
+  std::string ModelName() const { return "Paddle3D/smoke"; }
+
+  /** \brief Predict the perception result for an input image
+   *
+   * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+   * \param[in] result The output perception result will be writen to this structure
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const cv::Mat& img, PerceptionResult* result);
+
+  /** \brief Predict the perception results for a batch of input images
+   *
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output perception result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<cv::Mat>& imgs,
+                            std::vector<PerceptionResult>* results);
+
+  /// Get preprocessor reference of Smoke
+  virtual SmokePreprocessor& GetPreprocessor() {
+    return preprocessor_;
+  }
+
+  /// Get postprocessor reference of Smoke
+  virtual SmokePostprocessor& GetPostprocessor() {
+    return postprocessor_;
+  }
+
+ protected:
+  bool Initialize();
+  SmokePreprocessor preprocessor_;
+  SmokePostprocessor postprocessor_;
+  bool initialized_ = false;
+};
+
+}  // namespace perception
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/paddle3d/smoke/smoke_pybind.cc
+++ b/fastdeploy/vision/perception/paddle3d/smoke/smoke_pybind.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindSmoke(pybind11::module& m) {
+  pybind11::class_<vision::perception::SmokePreprocessor,
+                   vision::ProcessorManager>(m, "SmokePreprocessor")
+      .def(pybind11::init<std::string>())
+      .def("run", [](vision::perception::SmokePreprocessor& self,
+                     std::vector<pybind11::array>& im_list) {
+        std::vector<vision::FDMat> images;
+        for (size_t i = 0; i < im_list.size(); ++i) {
+          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+        }
+        std::vector<FDTensor> outputs;
+        if (!self.Run(&images, &outputs)) {
+          throw std::runtime_error(
+              "Failed to preprocess the input data in SmokePreprocessor.");
+        }
+        for (size_t i = 0; i < outputs.size(); ++i) {
+          outputs[i].StopSharing();
+        }
+        return outputs;
+      });
+
+  pybind11::class_<vision::perception::SmokePostprocessor>(m,
+                                                           "SmokePostprocessor")
+      .def(pybind11::init<>())
+      .def("run",
+           [](vision::perception::SmokePostprocessor& self,
+              std::vector<FDTensor>& inputs) {
+             std::vector<vision::PerceptionResult> results;
+             if (!self.Run(inputs, &results)) {
+               throw std::runtime_error(
+                   "Failed to postprocess the runtime result in "
+                   "SmokePostprocessor.");
+             }
+             return results;
+           })
+      .def("run", [](vision::perception::SmokePostprocessor& self,
+                     std::vector<pybind11::array>& input_array) {
+        std::vector<vision::PerceptionResult> results;
+        std::vector<FDTensor> inputs;
+        PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
+        if (!self.Run(inputs, &results)) {
+          throw std::runtime_error(
+              "Failed to postprocess the runtime result in "
+              "SmokePostprocessor.");
+        }
+        return results;
+      });
+
+  pybind11::class_<vision::perception::Smoke, FastDeployModel>(m, "Smoke")
+      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
+                          ModelFormat>())
+      .def("predict",
+           [](vision::perception::Smoke& self, pybind11::array& data) {
+             auto mat = PyArrayToCvMat(data);
+             vision::PerceptionResult res;
+             self.Predict(mat, &res);
+             return res;
+           })
+      .def("batch_predict",
+           [](vision::perception::Smoke& self,
+              std::vector<pybind11::array>& data) {
+             std::vector<cv::Mat> images;
+             for (size_t i = 0; i < data.size(); ++i) {
+               images.push_back(PyArrayToCvMat(data[i]));
+             }
+             std::vector<vision::PerceptionResult> results;
+             self.BatchPredict(images, &results);
+             return results;
+           })
+      .def_property_readonly("preprocessor",
+                             &vision::perception::Smoke::GetPreprocessor)
+      .def_property_readonly("postprocessor",
+                             &vision::perception::Smoke::GetPostprocessor);
+}
+}  // namespace fastdeploy
--- a/fastdeploy/vision/perception/perception_pybind.cc
+++ b/fastdeploy/vision/perception/perception_pybind.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+
+void BindSmoke(pybind11::module& m);
+
+void BindPerception(pybind11::module& m) {
+  auto perception_module =
+      m.def_submodule("perception", "3D object perception models.");
+  BindSmoke(perception_module);
+}
+}  // namespace fastdeploy
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -33,6 +33,7 @@ void BindHeadPose(pybind11::module& m);
 void BindSR(pybind11::module& m);
 void BindGeneration(pybind11::module& m);
 void BindVisualize(pybind11::module& m);
+void BindPerception(pybind11::module& m);

 void BindVision(pybind11::module& m) {
  pybind11::class_<vision::Mask>(m, "Mask")
@@ -108,6 +109,40 @@ void BindVision(pybind11::module& m) {
      .def("__repr__", &vision::DetectionResult::Str)
      .def("__str__", &vision::DetectionResult::Str);

+  pybind11::class_<vision::PerceptionResult>(m, "PerceptionResult")
+      .def(pybind11::init())
+      .def_readwrite("scores", &vision::PerceptionResult::scores)
+      .def_readwrite("label_ids", &vision::PerceptionResult::label_ids)
+      .def_readwrite("boxes", &vision::PerceptionResult::boxes)
+      .def_readwrite("center", &vision::PerceptionResult::center)
+      .def_readwrite("observation_angle",
+                     &vision::PerceptionResult::observation_angle)
+      .def_readwrite("yaw_angle", &vision::PerceptionResult::yaw_angle)
+      .def_readwrite("velocity", &vision::PerceptionResult::velocity)
+      .def(pybind11::pickle(
+          [](const vision::PerceptionResult& d) {
+            return pybind11::make_tuple(d.scores, d.label_ids, d.boxes,
+                                        d.center, d.observation_angle,
+                                        d.yaw_angle, d.velocity);
+          },
+          [](pybind11::tuple t) {
+            if (t.size() != 7)
+              throw std::runtime_error(
+                  "vision::PerceptionResult pickle with Invalid state!");
+
+            vision::PerceptionResult d;
+            d.scores = t[0].cast<std::vector<float>>();
+            d.label_ids = t[1].cast<std::vector<int32_t>>();
+            d.boxes = t[2].cast<std::vector<std::array<float, 7>>>();
+            d.center = t[3].cast<std::vector<std::array<float, 3>>>();
+            d.observation_angle = t[4].cast<std::vector<float>>();
+            d.yaw_angle = t[5].cast<std::vector<float>>();
+            d.velocity = t[6].cast<std::vector<std::array<float, 3>>>();
+            return d;
+          }))
+      .def("__repr__", &vision::PerceptionResult::Str)
+      .def("__str__", &vision::PerceptionResult::Str);
+
  pybind11::class_<vision::OCRResult>(m, "OCRResult")
      .def(pybind11::init())
      .def_readwrite("boxes", &vision::OCRResult::boxes)
@@ -224,5 +259,6 @@ void BindVision(pybind11::module& m) {
  BindSR(m);
  BindGeneration(m);
  BindVisualize(m);
+  BindPerception(m);
 }
 }  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/perception.cc
+++ b/fastdeploy/vision/visualize/perception.cc
@@ -0,0 +1,194 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+
+#include "fastdeploy/vision/visualize/visualize.h"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "yaml-cpp/yaml.h"
+
+namespace fastdeploy {
+namespace vision {
+
+using matrix = std::vector<std::vector<float>>;
+
+matrix Multiple(const matrix a, const matrix b) {
+  const int m = a.size();  // a rows
+  if (m == 0) {
+    matrix c;
+    return c;
+  }
+  if (a[0].size() != b.size()) {
+    FDERROR << "A[m,n] * B[p,q], n must equal to p." << std::endl;
+    matrix c;
+    return c;
+  }
+  const int n = a[0].size();  // a cols
+  const int p = b[0].size();  // b cols
+  matrix c(m, std::vector<float>(p, 0));
+  for (auto i = 0; i < m; i++) {
+    for (auto j = 0; j < p; j++) {
+      for (auto k = 0; k < n; k++) c[i][j] += a[i][k] * b[k][j];
+    }
+  }
+  return c;
+}
+
+cv::Mat VisPerception(const cv::Mat& im, const PerceptionResult& result,
+                      const std::string& config_file, float score_threshold,
+                      int line_size, float font_size) {
+  if (result.scores.empty()) {
+    return im;
+  }
+  YAML::Node cfg;
+  try {
+    cfg = YAML::LoadFile(config_file);
+  } catch (YAML::BadFile& e) {
+    FDERROR << "Failed to load yaml file " << config_file
+            << ", maybe you should check this file." << std::endl;
+    return im;
+  }
+
+  std::vector<int> target_size;
+  for (const auto& op : cfg["Preprocess"]) {
+    std::string op_name = op["type"].as<std::string>();
+    if (op_name == "Resize") {
+      target_size = op["target_size"].as<std::vector<int>>();
+    }
+  }
+
+  std::vector<float> vec_k_data = cfg["k_data"].as<std::vector<float>>();
+  if (vec_k_data.size() != 9) {
+    FDERROR
+        << "The K data load from the yaml file: " << config_file
+        << " is unexpected, the expected size is 9, but the loaded size is: "
+        << vec_k_data.size() << " ,maybe you should check this file."
+        << std::endl;
+    return im;
+  }
+  matrix k_data(3, std::vector<float>());
+  for (auto j = 0; j < 3; j++) {
+    k_data[j].insert(k_data[j].begin(), vec_k_data.begin() + j * 3,
+                     vec_k_data.begin() + j * 3 + 3);
+  }
+
+  std::vector<double> rvec = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+  std::vector<double> tvec = {0, 0, 0};
+
+  matrix connect_line_id = {{1, 0}, {2, 7}, {3, 6}, {4, 5}, {1, 2}, {2, 3},
+                            {3, 4}, {4, 1}, {0, 7}, {7, 6}, {6, 5}, {5, 0}};
+
+  int max_label_id =
+      *std::max_element(result.label_ids.begin(), result.label_ids.end());
+  std::vector<int> color_map = GenerateColorMap(max_label_id);
+  int h = im.rows;
+  int w = im.cols;
+  cv::Mat vis_im = im.clone();
+  cv::resize(im, vis_im, cv::Size(target_size[1], target_size[0]), 0, 0, 0);
+  for (size_t i = 0; i < result.scores.size(); ++i) {
+    if (result.scores[i] < 0.5) {
+      continue;
+    }
+    float h = result.boxes[i][4];
+    float w = result.boxes[i][5];
+    float l = result.boxes[i][6];
+
+    float x = result.center[i][0];
+    float y = result.center[i][1];
+    float z = result.center[i][2];
+    std::vector<float> x_corners = {0, l, l, l, l, 0, 0, 0};
+    std::vector<float> y_corners = {0, 0, h, h, 0, 0, h, h};
+    std::vector<float> z_corners = {0, 0, 0, w, w, w, w, 0};
+
+    for (auto j = 0; j < x_corners.size(); j++) {
+      x_corners[j] = x_corners[j] - l / 2;
+      y_corners[j] = y_corners[j] - h;
+      z_corners[j] = z_corners[j] - w / 2;
+    }
+
+    matrix corners_3d = {x_corners, y_corners, z_corners};
+
+    float ry = result.yaw_angle[i];
+    matrix rot_mat = {
+        {cosf(ry), 0, sinf(ry)}, {0, 1, 0}, {sinf(ry), 0, cosf(ry)}};
+
+    matrix rot_corners_3d = Multiple(rot_mat, corners_3d);
+
+    for (auto j = 0; j < rot_corners_3d[0].size(); j++) {
+      rot_corners_3d[0][j] += x;
+      rot_corners_3d[1][j] += y;
+      rot_corners_3d[2][j] += z;
+    }
+
+    auto corners_2d = Multiple(k_data, rot_corners_3d);
+
+    for (auto j = 0; j < corners_2d[0].size(); j++) {
+      corners_2d[0][j] /= corners_2d[2][j];
+      corners_2d[1][j] /= corners_2d[2][j];
+    }
+
+    std::vector<float> box2d = {
+        *std::min_element(corners_2d[0].begin(), corners_2d[0].end()),
+        *std::min_element(corners_2d[1].begin(), corners_2d[1].end()),
+        *std::max_element(corners_2d[0].begin(), corners_2d[0].end()),
+        *std::max_element(corners_2d[1].begin(), corners_2d[1].end())};
+
+    if (box2d[0] == 0 && box2d[1] == 0 && box2d[2] == 0 && box2d[3] == 0) {
+      continue;
+    }
+
+    std::vector<cv::Point3f> points3d;
+    for (auto j = 0; j < rot_corners_3d[0].size(); j++) {
+      points3d.push_back(cv::Point3f(rot_corners_3d[0][j], rot_corners_3d[1][j],
+                                     rot_corners_3d[2][j]));
+    }
+    cv::Mat rVec(3, 3, cv::DataType<double>::type, rvec.data());
+    cv::Mat tVec(3, 1, cv::DataType<double>::type, tvec.data());
+    std::vector<float> vec_k;
+    for (auto&& v : k_data) {
+      vec_k.insert(vec_k.end(), v.begin(), v.end());
+    }
+    cv::Mat intrinsicMat(3, 3, cv::DataType<float>::type, vec_k.data());
+    cv::Mat distCoeffs(5, 1, cv::DataType<double>::type);
+    std::vector<cv::Point2f> projectedPoints;
+    cv::projectPoints(points3d, rVec, tVec, intrinsicMat, distCoeffs,
+                      projectedPoints);
+
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar color = cv::Scalar(c0, c1, c2);
+    for (auto id = 0; id < connect_line_id.size(); id++) {
+      int p1 = connect_line_id[id][0];
+      int p2 = connect_line_id[id][1];
+      cv::line(vis_im, projectedPoints[p1], projectedPoints[p2], color, 1);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = std::to_string(result.label_ids[i]) + ", " + score;
+    cv::Point2f original;
+    original.x = box2d[0];
+    original.y = box2d[1];
+    cv::putText(vis_im, text, original, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+  }
+  return vis_im;
+}
+
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -32,6 +32,12 @@ class FASTDEPLOY_DECL Visualize {
  static cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
                              float score_threshold = 0.0, int line_size = 1,
                              float font_size = 0.5f);
+  static cv::Mat VisPerception(const cv::Mat& im,
+                     const PerceptionResult& result,
+                     const std::string & config_file,
+                     float score_threshold = 0.0,
+                     int line_size = 1,
+                     float font_size = 0.5f);
  static cv::Mat VisFaceDetection(const cv::Mat& im,
                                  const FaceDetectionResult& result,
                                  int line_size = 1, float font_size = 0.5f);
@@ -82,6 +88,22 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
                                     const std::vector<std::string>& labels,
                                     float score_threshold = 0.0,
                                     int line_size = 1, float font_size = 0.5f);
+/** \brief Show the visualized results with custom labels for detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the result produced by model
+ * \param[in] labels the visualized result will show the bounding box contain class label
+ * \param[in] score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \return cv::Mat type stores the visualized results
+ */
+FASTDEPLOY_DECL cv::Mat VisPerception(const cv::Mat& im,
+                     const PerceptionResult& result,
+                     const std::string & config_file,
+                     float score_threshold = 0.0,
+                     int line_size = 1 ,
+                     float font_size = 0.5f);
 /** \brief Show the visualized results for classification models
 *
 * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
--- a/fastdeploy/vision/visualize/visualize_pybind.cc
+++ b/fastdeploy/vision/visualize/visualize_pybind.cc
@@ -33,6 +33,18 @@ void BindVisualize(pybind11::module& m) {
          vision::Mat(vis_im).ShareWithTensor(&out);
          return TensorToPyArray(out);
        })
+      .def("vis_perception",
+           [](pybind11::array& im_data, vision::PerceptionResult& result,
+              const std::string& config_file, float score_threshold,
+              int line_size, float font_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im =
+                 vision::VisPerception(im, result, config_file, score_threshold,
+                                       line_size, font_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
      .def("vis_face_detection",
           [](pybind11::array& im_data, vision::FaceDetectionResult& result,
              int line_size, float font_size) {