Refine cpp/python api of visualize with lots of deprecated apis (#303)

* fix patchelf * refine visualize api * Update CMakeLists.txt * refine visualize api * add libs directory * Create __init__.py
2025-10-05 16:48:03 +08:00 · 2022-09-29 14:06:45 +08:00
parent 543622efaf
commit 8ba4a26ebb
14 changed files with 550 additions and 23 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,6 +66,7 @@ option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
 option(WITH_LITE_STATIC "Use Paddle-Lite static lib for Android." OFF)

 # Please don't open this flag now, some bugs exists.
+# Only support Linux Now
 # option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)

 # Whether to build fastdeploy with vision/text/... examples, only for testings.
@@ -298,10 +299,13 @@ endif()
 if(ENABLE_VISION)
  add_definitions(-DENABLE_VISION)
  if(ENABLE_OPENCV_CUDA)
-    add_definitions(-DENABLE_OPENCV_CUDA)
-    if(APPLE OR ANDROID OR IOS)
-      message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android os, please set -DENABLE_OPENCV_CUDA=OFF.")
+    if(NOT WITH_GPU)
+      message(FATAL_ERROR "ENABLE_OPENCV_CUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
    endif()
+    if(APPLE OR ANDROID OR IOS OR WIN32)
+      message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android/windows os, please set -DENABLE_OPENCV_CUDA=OFF.")
+    endif()
+    add_definitions(-DENABLE_OPENCV_CUDA)
  endif()
  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
  list(APPEND DEPEND_LIBS yaml-cpp)
--- a/fastdeploy/libs/init.py
+++ b/fastdeploy/libs/init.py
--- a/fastdeploy/vision/visualize/detection.cc
+++ b/fastdeploy/vision/visualize/detection.cc
@@ -14,12 +14,96 @@

 #ifdef ENABLE_VISION_VISUALIZE

+#include <algorithm>
+
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"

 namespace fastdeploy {
 namespace vision {

+cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
+                     float score_threshold, int line_size, float font_size) {
+  if (result.contain_masks) {
+    FDASSERT(result.boxes.size() == result.masks.size(),
+             "The size of masks must be equal to the size of boxes, but now "
+             "%zu != %zu.",
+             result.boxes.size(), result.masks.size());
+  }
+  int max_label_id =
+      *std::max_element(result.label_ids.begin(), result.label_ids.end());
+  std::vector<int> color_map = GenerateColorMap(max_label_id);
+
+  int h = im.rows;
+  int w = im.cols;
+  auto vis_im = im.clone();
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    if (result.scores[i] < score_threshold) {
+      continue;
+    }
+    int x1 = static_cast<int>(result.boxes[i][0]);
+    int y1 = static_cast<int>(result.boxes[i][1]);
+    int x2 = static_cast<int>(result.boxes[i][2]);
+    int y2 = static_cast<int>(result.boxes[i][3]);
+    int box_h = y2 - y1;
+    int box_w = x2 - x1;
+    int c0 = color_map[3 * result.label_ids[i] + 0];
+    int c1 = color_map[3 * result.label_ids[i] + 1];
+    int c2 = color_map[3 * result.label_ids[i] + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string id = std::to_string(result.label_ids[i]);
+    std::string score = std::to_string(result.scores[i]);
+    if (score.size() > 4) {
+      score = score.substr(0, 4);
+    }
+    std::string text = id + "," + score;
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = x1;
+    origin.y = y1;
+    cv::Rect rect(x1, y1, box_w, box_h);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    if (result.contain_masks) {
+      int mask_h = static_cast<int>(result.masks[i].shape[0]);
+      int mask_w = static_cast<int>(result.masks[i].shape[1]);
+      // non-const pointer for cv:Mat constructor
+      int32_t* mask_raw_data = const_cast<int32_t*>(
+          static_cast<const int32_t*>(result.masks[i].Data()));
+      // only reference to mask data (zero copy)
+      cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
+      if ((mask_h != box_h) || (mask_w != box_w)) {
+        cv::resize(mask, mask, cv::Size(box_w, box_h));
+      }
+      // use a bright color for instance mask
+      int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
+      int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
+      int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
+      int32_t* mask_data = reinterpret_cast<int32_t*>(mask.data);
+      // inplace blending (zero copy)
+      uchar* vis_im_data = static_cast<uchar*>(vis_im.data);
+      for (size_t i = y1; i < y2; ++i) {
+        for (size_t j = x1; j < x2; ++j) {
+          if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
+            vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc0) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc1) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
+            vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+                static_cast<float>(mc2) * 0.5f +
+                static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
+          }
+        }
+      }
+    }
+  }
+  return vis_im;
+}
+
 // Default only support visualize num_classes <= 1000
 // If need to visualize num_classes > 1000
 // Please call Visualize::GetColorMap(num_classes) first
@@ -27,6 +111,10 @@ cv::Mat Visualize::VisDetection(const cv::Mat& im,
                                const DetectionResult& result,
                                float score_threshold, int line_size,
                                float font_size) {
+  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisDetection is "
+               "deprecated, please use fastdeploy::vision:VisDetection "
+               "function instead."
+            << std::endl;
  if (result.contain_masks) {
    FDASSERT(result.boxes.size() == result.masks.size(),
             "The size of masks must be equal the size of boxes!");
--- a/fastdeploy/vision/visualize/face_detection.cc
+++ b/fastdeploy/vision/visualize/face_detection.cc
@@ -21,12 +21,69 @@ namespace fastdeploy {

 namespace vision {

+cv::Mat VisFaceDetection(const cv::Mat& im, const FaceDetectionResult& result,
+                         int line_size, float font_size) {
+  auto color_map = GenerateColorMap();
+  int h = im.rows;
+  int w = im.cols;
+
+  auto vis_im = im.clone();
+  bool vis_landmarks = false;
+  if ((result.landmarks_per_face > 0) &&
+      (result.boxes.size() * result.landmarks_per_face ==
+       result.landmarks.size())) {
+    vis_landmarks = true;
+  }
+  for (size_t i = 0; i < result.boxes.size(); ++i) {
+    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
+                  result.boxes[i][2] - result.boxes[i][0],
+                  result.boxes[i][3] - result.boxes[i][1]);
+    int color_id = i % 333;
+    int c0 = color_map[3 * color_id + 0];
+    int c1 = color_map[3 * color_id + 1];
+    int c2 = color_map[3 * color_id + 2];
+    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
+    std::string text = std::to_string(result.scores[i]);
+    if (text.size() > 4) {
+      text = text.substr(0, 4);
+    }
+    int font = cv::FONT_HERSHEY_SIMPLEX;
+    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
+    cv::Point origin;
+    origin.x = rect.x;
+    origin.y = rect.y;
+    cv::Rect text_background =
+        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
+                 text_size.width, text_size.height);
+    cv::rectangle(vis_im, rect, rect_color, line_size);
+    cv::putText(vis_im, text, origin, font, font_size,
+                cv::Scalar(255, 255, 255), 1);
+    // vis landmarks (if have)
+    if (vis_landmarks) {
+      cv::Scalar landmark_color = rect_color;
+      for (size_t j = 0; j < result.landmarks_per_face; ++j) {
+        cv::Point landmark;
+        landmark.x = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][0]);
+        landmark.y = static_cast<int>(
+            result.landmarks[i * result.landmarks_per_face + j][1]);
+        cv::circle(vis_im, landmark, line_size, landmark_color, -1);
+      }
+    }
+  }
+  return vis_im;
+}
+
 // Default only support visualize num_classes <= 1000
 // If need to visualize num_classes > 1000
 // Please call Visualize::GetColorMap(num_classes) first
 cv::Mat Visualize::VisFaceDetection(const cv::Mat& im,
                                    const FaceDetectionResult& result,
                                    int line_size, float font_size) {
+  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisFaceDetection is "
+               "deprecated, please use fastdeploy::vision:VisFaceDetection "
+               "function instead."
+            << std::endl;
  auto color_map = GetColorMap();
  int h = im.rows;
  int w = im.cols;
--- a/fastdeploy/vision/visualize/matting_alpha.cc
+++ b/fastdeploy/vision/visualize/matting_alpha.cc
@@ -21,9 +21,59 @@
 namespace fastdeploy {
 namespace vision {

+cv::Mat VisMatting(const cv::Mat& im, const MattingResult& result,
+                   bool remove_small_connected_area) {
+  FDASSERT((!im.empty()), "im can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels mat!");
+
+  auto vis_img = im.clone();
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  std::vector<float> alpha_copy;
+  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
+  float* alpha_ptr = static_cast<float*>(alpha_copy.data());
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+
+  uchar* vis_data = static_cast<uchar*>(vis_img.data);
+  uchar* im_data = static_cast<uchar*>(im.data);
+  float* alpha_data = reinterpret_cast<float*>(alpha.data);
+
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      float alpha_val = alpha_data[i * width + j];
+      vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 0]) * alpha_val +
+          (1.f - alpha_val) * 153.f);
+      vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 1]) * alpha_val +
+          (1.f - alpha_val) * 255.f);
+      vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * width * 3 + j * 3 + 2]) * alpha_val +
+          (1.f - alpha_val) * 120.f);
+    }
+  }
+  return vis_img;
+}
+
 cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im,
                                   const MattingResult& result,
                                   bool remove_small_connected_area) {
+  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisMattingAlpha is "
+               "deprecated, please use fastdeploy::vision:VisMatting function "
+               "instead."
+            << std::endl;
  FDASSERT((!im.empty()), "im can't be empty!");
  FDASSERT((im.channels() == 3), "Only support 3 channels mat!");

--- a/fastdeploy/vision/visualize/ocr.cc
+++ b/fastdeploy/vision/visualize/ocr.cc
@@ -20,7 +20,30 @@
 namespace fastdeploy {
 namespace vision {

+cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+  auto vis_im = im.clone();
+
+  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    cv::Point rook_points[4];
+
+    for (int m = 0; m < 4; m++) {
+      rook_points[m] = cv::Point(int(ocr_result.boxes[n][m * 2]),
+                                 int(ocr_result.boxes[n][m * 2 + 1]));
+    }
+
+    const cv::Point *ppt[1] = {rook_points};
+    int npt[] = {4};
+    cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+  }
+
+  return vis_im;
+}
+
 cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+  FDWARNING
+      << "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
+         "please use fastdeploy::vision:VisOcr function instead."
+      << std::endl;
  auto vis_im = im.clone();

  for (int n = 0; n < ocr_result.boxes.size(); n++) {
--- a/fastdeploy/vision/visualize/remove_small_connnected_area.cc
+++ b/fastdeploy/vision/visualize/remove_small_connnected_area.cc
@@ -21,6 +21,49 @@
 namespace fastdeploy {
 namespace vision {

+cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred, float threshold) {
+  cv::Mat gray, binary;
+  alpha_pred.convertTo(gray, CV_8UC1, 255.f);
+  cv::Mat alpha_pred_clone = alpha_pred.clone();
+  // 255 * 0.05 ~ 13
+  unsigned int binary_threshold = static_cast<unsigned int>(255.f * threshold);
+  cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY);
+  // morphologyEx with OPEN operation to remove noise first.
+  auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3),
+                                          cv::Point(-1, -1));
+  cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel);
+  // Computationally connected domain
+  cv::Mat labels = cv::Mat::zeros(alpha_pred_clone.size(), CV_32S);
+  cv::Mat stats, centroids;
+  int num_labels =
+      cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4);
+  if (num_labels <= 1) {
+    // no noise, skip.
+    return alpha_pred;
+  }
+  // find max connected area, 0 is background
+  int max_connected_id = 1;  // 1,2,...
+  int max_connected_area = stats.at<int>(max_connected_id, cv::CC_STAT_AREA);
+  for (int i = 1; i < num_labels; ++i) {
+    int tmp_connected_area = stats.at<int>(i, cv::CC_STAT_AREA);
+    if (tmp_connected_area > max_connected_area) {
+      max_connected_area = tmp_connected_area;
+      max_connected_id = i;
+    }
+  }
+  const int h = alpha_pred_clone.rows;
+  const int w = alpha_pred_clone.cols;
+  // remove small connected area.
+  for (int i = 0; i < h; ++i) {
+    int* label_row_ptr = labels.ptr<int>(i);
+    float* alpha_row_ptr = alpha_pred_clone.ptr<float>(i);
+    for (int j = 0; j < w; ++j) {
+      if (label_row_ptr[j] != max_connected_id) alpha_row_ptr[j] = 0.f;
+    }
+  }
+  return alpha_pred_clone;
+}
+
 cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
                                            float threshold) {
  cv::Mat gray, binary;
--- a/fastdeploy/vision/visualize/segmentation.cc
+++ b/fastdeploy/vision/visualize/segmentation.cc
@@ -21,8 +21,32 @@
 namespace fastdeploy {
 namespace vision {

+cv::Mat VisSegmentation(const cv::Mat& im, const SegmentationResult& result,
+                        float weight) {
+  auto color_map = GenerateColorMap(1000);
+  int64_t height = result.shape[0];
+  int64_t width = result.shape[1];
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
+
+  int64_t index = 0;
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      int category_id = result.label_map[index++];
+      vis_img.at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
+      vis_img.at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
+      vis_img.at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];
+    }
+  }
+  cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img);
+  return vis_img;
+}
+
 cv::Mat Visualize::VisSegmentation(const cv::Mat& im,
                                   const SegmentationResult& result) {
+  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisSegmentation is "
+               "deprecated, please use fastdeploy::vision:VisSegmentation "
+               "function instead."
+            << std::endl;
  auto color_map = GetColorMap();
  int64_t height = result.shape[0];
  int64_t width = result.shape[1];
--- a/fastdeploy/vision/visualize/swap_background.cc
+++ b/fastdeploy/vision/visualize/swap_background.cc
@@ -21,6 +21,104 @@
 namespace fastdeploy {
 namespace vision {

+cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background,
+                       const MattingResult& result,
+                       bool remove_small_connected_area) {
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  auto vis_img = im.clone();
+  auto background_copy = background.clone();
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  std::vector<float> alpha_copy;
+  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
+  float* alpha_ptr = static_cast<float*>(alpha_copy.data());
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+  if ((background_copy).type() != CV_8UC3) {
+    (background_copy).convertTo((background_copy), CV_8UC3);
+  }
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_copy, cv::Size(width, height));
+  }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }
+  uchar* vis_data = static_cast<uchar*>(vis_img.data);
+  uchar* background_data = static_cast<uchar*>(background_copy.data);
+  uchar* im_data = static_cast<uchar*>(im.data);
+  float* alpha_data = reinterpret_cast<float*>(alpha.data);
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      float alpha_val = alpha_data[i * width + j];
+      for (size_t c = 0; c < 3; ++c) {
+        vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast<uchar>(
+            static_cast<float>(im_data[i * width * 3 + j * 3 + c]) * alpha_val +
+            (1.f - alpha_val) * background_data[i * width * 3 + j * 3 + c]);
+      }
+    }
+  }
+
+  return vis_img;
+}
+
+cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background,
+                       const SegmentationResult& result, int background_label) {
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  auto vis_img = im.clone();
+  auto background_copy = background.clone();
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  if ((vis_img).type() != CV_8UC3) {
+    (vis_img).convertTo((vis_img), CV_8UC3);
+  }
+  if ((background_copy).type() != CV_8UC3) {
+    (background_copy).convertTo((background_copy), CV_8UC3);
+  }
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_copy, cv::Size(width, height));
+  }
+  uchar* vis_data = static_cast<uchar*>(vis_img.data);
+  uchar* background_data = static_cast<uchar*>(background_copy.data);
+  uchar* im_data = static_cast<uchar*>(im.data);
+  float keep_value = 0.f;
+  for (size_t i = 0; i < height; ++i) {
+    for (size_t j = 0; j < width; ++j) {
+      int category_id = result.label_map[i * width + j];
+      if (background_label != category_id) {
+        keep_value = 1.0f;
+      } else {
+        keep_value = 0.f;
+      }
+      for (size_t c = 0; c < 3; ++c) {
+        vis_data[i * width * 3 + j * 3 + c] = cv::saturate_cast<uchar>(
+            static_cast<float>(im_data[i * width * 3 + j * 3 + c]) *
+                keep_value +
+            (1.f - keep_value) * background_data[i * width * 3 + j * 3 + c]);
+      }
+    }
+  }
+  return vis_img;
+}
+
 cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im,
                                         const cv::Mat& background,
                                         const MattingResult& result,
--- a/fastdeploy/vision/visualize/visualize.cc
+++ b/fastdeploy/vision/visualize/visualize.cc
@@ -21,6 +21,24 @@ namespace vision {
 int Visualize::num_classes_ = 0;
 std::vector<int> Visualize::color_map_ = std::vector<int>();

+static std::vector<int> global_fd_vis_color_map = std::vector<int>();
+
+std::vector<int> GenerateColorMap(int num_classes) {
+  std::vector<int> color_map(num_classes * 3, 0);
+  for (int i = 0; i < num_classes; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return color_map;
+}
+
 const std::vector<int>& Visualize::GetColorMap(int num_classes) {
  if (num_classes < num_classes_) {
    return color_map_;
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -20,6 +20,7 @@
 namespace fastdeploy {
 namespace vision {

+// This class will deprecated, please not use it
 class FASTDEPLOY_DECL Visualize {
 public:
  static int num_classes_;
@@ -47,6 +48,31 @@ class FASTDEPLOY_DECL Visualize {
  static cv::Mat VisOcr(const cv::Mat& srcimg, const OCRResult& ocr_result);
 };

+std::vector<int> GenerateColorMap(int num_classes = 1000);
+cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred, float threshold);
+FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
+                                     const DetectionResult& result,
+                                     float score_threshold = 0.0,
+                                     int line_size = 1, float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat VisFaceDetection(const cv::Mat& im,
+                                         const FaceDetectionResult& result,
+                                         int line_size = 1,
+                                         float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat VisSegmentation(const cv::Mat& im,
+                                        const SegmentationResult& result,
+                                        float weight = 0.5);
+FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im,
+                                   const MattingResult& result,
+                                   bool remove_small_connected_area = false);
+FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result);
+FASTDEPLOY_DECL cv::Mat SwapBackground(
+    const cv::Mat& im, const cv::Mat& background, const MattingResult& result,
+    bool remove_small_connected_area = false);
+FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
+                                       const cv::Mat& background,
+                                       const SegmentationResult& result,
+                                       int background_label);
+
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/visualize_pybind.cc
+++ b/fastdeploy/vision/visualize/visualize_pybind.cc
@@ -16,6 +16,76 @@

 namespace fastdeploy {
 void BindVisualize(pybind11::module& m) {
+  m.def("vis_detection",
+        [](pybind11::array& im_data, vision::DetectionResult& result,
+           float score_threshold, int line_size, float font_size) {
+          auto im = PyArrayToCvMat(im_data);
+          auto vis_im = vision::VisDetection(im, result, score_threshold,
+                                             line_size, font_size);
+          FDTensor out;
+          vision::Mat(vis_im).ShareWithTensor(&out);
+          return TensorToPyArray(out);
+        })
+      .def("vis_face_detection",
+           [](pybind11::array& im_data, vision::FaceDetectionResult& result,
+              int line_size, float font_size) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im =
+                 vision::VisFaceDetection(im, result, line_size, font_size);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_segmentation",
+           [](pybind11::array& im_data, vision::SegmentationResult& result,
+              float weight) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisSegmentation(im, result, weight);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("swap_background",
+           [](pybind11::array& im_data, pybind11::array& background_data,
+              vision::MattingResult& result, bool remove_small_connected_area) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             cv::Mat background = PyArrayToCvMat(background_data);
+             auto vis_im = vision::SwapBackground(im, background, result,
+                                                  remove_small_connected_area);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("swap_background",
+           [](pybind11::array& im_data, pybind11::array& background_data,
+              vision::SegmentationResult& result, int background_label) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             cv::Mat background = PyArrayToCvMat(background_data);
+             auto vis_im = vision::SwapBackground(im, background, result,
+                                                  background_label);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_ppocr",
+           [](pybind11::array& im_data, vision::OCRResult& result) {
+             auto im = PyArrayToCvMat(im_data);
+             auto vis_im = vision::VisOcr(im, result);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           })
+      .def("vis_matting",
+           [](pybind11::array& im_data, vision::MattingResult& result,
+              bool remove_small_connected_area) {
+             cv::Mat im = PyArrayToCvMat(im_data);
+             auto vis_im =
+                 vision::VisMatting(im, result, remove_small_connected_area);
+             FDTensor out;
+             vision::Mat(vis_im).ShareWithTensor(&out);
+             return TensorToPyArray(out);
+           });
+
  pybind11::class_<vision::Visualize>(m, "Visualize")
      .def(pybind11::init<>())
      .def_static("vis_detection",
--- a/python/fastdeploy/libs/init.py
+++ b/python/fastdeploy/libs/init.py
@@ -0,0 +1 @@
+
--- a/python/fastdeploy/vision/visualize/init.py
+++ b/python/fastdeploy/vision/visualize/init.py
@@ -22,23 +22,31 @@ def vis_detection(im_data,
                  score_threshold=0.0,
                  line_size=1,
                  font_size=0.5):
-    return C.vision.Visualize.vis_detection(
-        im_data, det_result, score_threshold, line_size, font_size)
-
-
-def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5):
-    return C.vision.Visualize.vis_face_detection(im_data, face_det_result,
+    return C.vision.vis_detection(im_data, det_result, score_threshold,
                                  line_size, font_size)


+def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5):
+    return C.vision.vis_face_detection(im_data, face_det_result, line_size,
+                                       font_size)
+
+
 def vis_segmentation(im_data, seg_result):
-    return C.vision.Visualize.vis_segmentation(im_data, seg_result)
+    return C.vision.vis_segmentation(im_data, seg_result)


 def vis_matting_alpha(im_data,
                      matting_result,
                      remove_small_connected_area=False):
-    return C.vision.Visualize.vis_matting_alpha(im_data, matting_result,
+    logging.warning(
+        "DEPRECATED: fastdeploy.vision.vis_matting_alpha is deprecated, please use fastdeploy.vision.vis_matting function instead."
+    )
+    return C.vision.vis_matting(im_data, matting_result,
+                                remove_small_connected_area)
+
+
+def vis_matting(im_data, matting_result, remove_small_connected_area=False):
+    return C.vision.vis_matting(im_data, matting_result,
                                remove_small_connected_area)


@@ -46,15 +54,19 @@ def swap_background_matting(im_data,
                            background,
                            result,
                            remove_small_connected_area=False):
+    logging.warning(
+        "DEPRECATED: fastdeploy.vision.swap_background_matting is deprecated, please use fastdeploy.vision.swap_background function instead."
+    )
    assert isinstance(
-        result,
-        C.vision.MattingResult), "The result must be MattingResult type"
+        result, C.vision.MattingResult), "The result must be MattingResult type"
    return C.vision.Visualize.swap_background_matting(
        im_data, background, result, remove_small_connected_area)


-def swap_background_segmentation(im_data, background, background_label,
-                                 result):
+def swap_background_segmentation(im_data, background, background_label, result):
+    logging.warning(
+        "DEPRECATED: fastdeploy.vision.swap_background_segmentation is deprecated, please use fastdeploy.vision.swap_background function instead."
+    )
    assert isinstance(
        result, C.vision.
        SegmentationResult), "The result must be SegmentaitonResult type"
@@ -62,9 +74,22 @@ def swap_background_segmentation(im_data, background, background_label,
        im_data, background, background_label, result)


-def remove_small_connected_area(alpha_pred_data, threshold):
-    assert len(alpha_pred_data.shape) == 3, "alpha has a (h, w, 1) shape"
-    return C.vision.Visualize.remove_small_connected_area(alpha_pred_data,
-                                                          threshold)
+def swap_background(im_data,
+                    background,
+                    result,
+                    remove_small_connected_area=False,
+                    background_label=0):
+    if isinstance(result, C.vision.MattingResult):
+        return C.vision.swap_background(im_data, background, result,
+                                        remove_small_connected_area)
+    elif isinstance(result, C.vision.SegmentationResult):
+        return C.vision.swap_background(im_data, background, result,
+                                        background_label)
+    else:
+        raise Exception(
+            "Only support result type of MattingResult or SegmentationResult, but now the data type is {}.".
+            format(type(result)))
+
+
 def vis_ppocr(im_data, det_result):
-    return C.vision.Visualize.vis_ppocr(im_data, det_result)
+    return C.vision.vis_ppocr(im_data, det_result)