[Other] Remove useless macros (#1095)

* Remove useless macros * triger ci * fix check error * rename INTEGRATE_PADDLE2ONNX to ENABLE_PADDLE2ONNX
2025-10-05 16:48:03 +08:00 · 2023-01-09 21:35:23 +08:00
parent 48bc7241cb
commit df20b2a02b
35 changed files with 510 additions and 559 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,25 +103,8 @@ if(NOT GIT_URL)
    set(GIT_URL "https://github.com")
 endif()

-# Check for 32bit system
-if(WIN32)
-  if(NOT CMAKE_CL_64)
-    message("***********************Compile on non 64-bit system now**********************")
-    add_definitions(-DNON_64_PLATFORM)
-    if(WITH_GPU)
-      message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.")
-    endif()
-    if(ENABLE_PADDLE_BACKEND)
-      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
-    endif()
-    if(ENABLE_POROS_BACKEND)
-    message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
-    endif()
-    if(ENABLE_VISION)
-      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
-    endif()
-  endif()
-endif()
+# check build options
+include(${PROJECT_SOURCE_DIR}/cmake/check.cmake)

 if(WIN32 AND ENABLE_VISION)
  add_definitions(-DYAML_CPP_DLL)
@@ -149,30 +132,15 @@ if (WITH_ASCEND)
 endif()

 if (WITH_KUNLUNXIN)
-  if(NOT ENABLE_LITE_BACKEND)
-      set(ENABLE_LITE_BACKEND ON)
-  endif()
-  if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
-    message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
-  endif()
-  if(NOT PADDLELITE_URL)
-    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
-  endif()
+  include(${PROJECT_SOURCE_DIR}/cmake/kunlunxin.cmake)
 endif()

-if(ANDROID OR IOS)
-  if(ENABLE_ORT_BACKEND)
-    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
-  endif()
-  if(ENABLE_PADDLE_BACKEND)
-    message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.")
-  endif()
-  if(ENABLE_OPENVINO_BACKEND)
-    message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.")
-  endif()
-  if(ENABLE_TRT_BACKEND)
-    message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.")
+if(WITH_IPU)
+  if(NOT ENABLE_PADDLE_BACKEND)
+    message("Will force to set ENABLE_PADDLE_BACKEND when build with GraphCore IPU.")
+    set(ENABLE_PADDLE_BACKEND ON)
  endif()
+  add_definitions(-DWITH_IPU)
 endif()

 # Check for macOS architecture
@@ -208,7 +176,6 @@ set(DEPEND_LIBS "")
 file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
 string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)

-
 # Add eigen lib
 include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen)
 if(WIN32)
@@ -221,7 +188,7 @@ if(WITH_SW)
 endif()

 if(ENABLE_ORT_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_ORT_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
@@ -236,7 +203,7 @@ if(ENABLE_LITE_BACKEND)
 endif()

 if(ENABLE_PADDLE_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_PADDLE_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
@@ -250,7 +217,7 @@ if(ENABLE_PADDLE_BACKEND)
 endif()

 if(ENABLE_OPENVINO_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_OPENVINO_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
@@ -313,36 +280,24 @@ if(ENABLE_POROS_BACKEND)
 endif()

 if(WITH_GPU)
-  if(APPLE)
-    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
-    set(WITH_GPU OFF)
-  elseif(ANDROID OR IOS)
-    message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.")
-    set(WITH_GPU OFF)
+  add_definitions(-DWITH_GPU)
+  include_directories(${CUDA_DIRECTORY}/include)
+  if(WIN32)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
  else()
-    add_definitions(-DWITH_GPU)
-    include_directories(${CUDA_DIRECTORY}/include)
-    if(WIN32)
-      find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
-    else()
-      find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
-    endif()
-    list(APPEND DEPEND_LIBS ${CUDA_LIB})
-
-    # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc.
-    enable_language(CUDA)
-    message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
-                    "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
-    include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
  endif()
-endif()
+  list(APPEND DEPEND_LIBS ${CUDA_LIB})

-if(WITH_IPU)
-  add_definitions(-DWITH_IPU)
+  # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc.
+  enable_language(CUDA)
+  message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
+                  "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
+  include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake)
 endif()

 if(ENABLE_TRT_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  if(APPLE OR ANDROID OR IOS)
    message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")
  endif()
@@ -406,7 +361,6 @@ endif()

 if(ENABLE_VISION)
  add_definitions(-DENABLE_VISION)
-  add_definitions(-DENABLE_VISION_VISUALIZE)
  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
  list(APPEND DEPEND_LIBS yaml-cpp)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS})
@@ -435,11 +389,11 @@ if(ENABLE_ENCRYPTION)
  list(APPEND DEPEND_LIBS ${OPENSSL_LIBRARIES})
 endif()

-if(ENABLE_PADDLE_FRONTEND)
-  add_definitions(-DENABLE_PADDLE_FRONTEND)
+if(ENABLE_PADDLE2ONNX)
+  add_definitions(-DENABLE_PADDLE2ONNX)
  include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
  list(APPEND DEPEND_LIBS external_paddle2onnx)
-endif(ENABLE_PADDLE_FRONTEND)
+endif(ENABLE_PADDLE2ONNX)


 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -10,7 +10,7 @@ set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
 set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
 set(POROS_VERSION @POROS_VERSION@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
-set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
+set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
 set(ENABLE_VISION @ENABLE_VISION@)
 set(ENABLE_FLYCV @ENABLE_FLYCV@)
 set(ENABLE_TEXT @ENABLE_TEXT@)
@@ -238,7 +238,7 @@ if (ENABLE_TEXT)
  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
 endif()

-if(ENABLE_PADDLE_FRONTEND)
+if(ENABLE_PADDLE2ONNX)
  if(ANDROID)
    message(FATAL_ERROR "Not support fastdeploy-paddle2onnx APIs with Android now!")
  endif()
--- a/cmake/check.cmake
+++ b/cmake/check.cmake
@@ -0,0 +1,44 @@
+# Check for 32bit system
+if(WIN32)
+  if(NOT CMAKE_CL_64)
+    message("***********************Compile on non 64-bit system now**********************")
+    add_definitions(-DNON_64_PLATFORM)
+    if(WITH_GPU)
+      message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.")
+    endif()
+    if(ENABLE_PADDLE_BACKEND)
+      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
+    endif()
+    if(ENABLE_POROS_BACKEND)
+      message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
+    endif()
+    if(ENABLE_VISION)
+      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
+    endif()
+  endif()
+endif()
+
+if(ANDROID OR IOS)
+  if(ENABLE_ORT_BACKEND)
+    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
+  endif()
+  if(ENABLE_PADDLE_BACKEND)
+    message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.")
+  endif()
+  if(ENABLE_OPENVINO_BACKEND)
+    message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.")
+  endif()
+  if(ENABLE_TRT_BACKEND)
+    message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.")
+  endif()
+endif()
+
+if(WITH_GPU)
+  if(APPLE)
+    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
+    set(WITH_GPU OFF)
+  elseif(ANDROID OR IOS)
+    message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.")
+    set(WITH_GPU OFF)
+  endif()
+endif()
--- a/cmake/kunlunxin.cmake
+++ b/cmake/kunlunxin.cmake
@@ -0,0 +1,12 @@
+if(NOT ENABLE_LITE_BACKEND)
+  message("Will force to set ENABLE_LITE_BACKEND when build with KunlunXin.")
+  set(ENABLE_LITE_BACKEND ON)
+endif()
+
+if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
+  message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
+endif()
+
+if(NOT PADDLELITE_URL)
+  set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
+endif()
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -13,6 +13,10 @@
 # limitations under the License.
 include(ExternalProject)

+if(WITH_GPU AND WITH_IPU)
+  message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.")
+endif()
+
 option(PADDLEINFERENCE_DIRECTORY "Directory of Paddle Inference library" OFF)

 set(PADDLEINFERENCE_PROJECT "extern_paddle_inference")
--- a/cmake/timvx.cmake
+++ b/cmake/timvx.cmake
@@ -3,9 +3,9 @@ if(NOT ${ENABLE_LITE_BACKEND})
    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON")
    set(ENABLE_LITE_BACKEND ON)
 endif()
-if(${ENABLE_PADDLE_FRONTEND})
-    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_FRONTEND=OFF")
-    set(ENABLE_PADDLE_FRONTEND OFF)
+if(${ENABLE_PADDLE2ONNX})
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE2ONNX=OFF")
+    set(ENABLE_PADDLE2ONNX OFF)
 endif()
 if(${ENABLE_ORT_BACKEND})
    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF")
--- a/docs/api_docs/cpp/Doxyfile
+++ b/docs/api_docs/cpp/Doxyfile
@@ -2100,7 +2100,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

-PREDEFINED = protected=private ENABLE_VISION_VISUALIZE=1
+PREDEFINED = protected=private

 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -17,8 +17,8 @@
 #cmakedefine FASTDEPLOY_LIB
 #endif

-#ifndef ENABLE_PADDLE_FRONTEND
-#cmakedefine ENABLE_PADDLE_FRONTEND
+#ifndef ENABLE_PADDLE2ONNX
+#cmakedefine ENABLE_PADDLE2ONNX
 #endif

 #ifndef ENABLE_ORT_BACKEND
@@ -56,13 +56,3 @@
 #ifndef ENABLE_TEXT
 #cmakedefine ENABLE_TEXT
 #endif
-
-#ifdef ENABLE_VISION
-#ifndef ENABLE_VISION_VISUALIZE
-#define ENABLE_VISION_VISUALIZE
-#endif
-#endif
-
-#ifndef ENABLE_FDTENSOR_FUNC
-#cmakedefine ENABLE_FDTENSOR_FUNC
-#endif
--- a/fastdeploy/runtime/backends/openvino/ov_backend.cc
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc
@@ -13,7 +13,7 @@
 // limitations under the License.

 #include "fastdeploy/runtime/backends/openvino/ov_backend.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif

--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -21,7 +21,7 @@
 #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
 #include "fastdeploy/runtime/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif

@@ -84,7 +84,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
  char* model_content_ptr;
  int model_content_size = 0;
  bool save_external = false;
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
  std::vector<paddle2onnx::CustomOp> ops;
  ops.resize(2);
  strcpy(ops[0].op_name, "multiclass_nms3");
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -21,7 +21,7 @@

 #include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/runtime/backends/paddle/option.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
 #include "fastdeploy/utils/unique_ptr.h"
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -20,7 +20,7 @@
 #include "NvInferRuntime.h"
 #include "fastdeploy/function/cuda_cast.h"
 #include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif

@@ -123,7 +123,7 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
  }
  option_ = option;

-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
  std::vector<paddle2onnx::CustomOp> ops;
  ops.resize(1);
  strcpy(ops[0].op_name, "pool2d");
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -1,7 +1,8 @@
 #include "fastdeploy/vision/keypointdet/pptinypose/pptinypose.h"
+
 #include "fastdeploy/vision/utils/utils.h"
 #include "yaml-cpp/yaml.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
 #include "fastdeploy/vision.h"
@@ -16,7 +17,8 @@ PPTinyPose::PPTinyPose(const std::string& model_file,
                       const RuntimeOption& custom_option,
                       const ModelFormat& model_format) {
  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                        Backend::LITE};
  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
  valid_kunlunxin_backends = {Backend::LITE};
  runtime_option = custom_option;
@@ -100,11 +102,11 @@ bool PPTinyPose::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
      int resize_height = -1;
      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
      cv::Mat trans_matrix(2, 3, CV_64FC1);
-      GetAffineTransform(center, scale, 0, {resize_width, resize_height}, &trans_matrix, 0);
+      GetAffineTransform(center, scale, 0, {resize_width, resize_height},
+                         &trans_matrix, 0);
      if (!(processor->SetTransformMatrix(trans_matrix))) {
        FDERROR << "Failed to set transform matrix of "
-                << processors_[i]->Name()
-                << " processor." << std::endl;
+                << processors_[i]->Name() << " processor." << std::endl;
      }
    }
    if (!(*(processors_[i].get()))(mat)) {
@@ -160,7 +162,9 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
    std::copy(static_cast<int64_t*>(idx_data),
              static_cast<int64_t*>(idx_data) + idxdata_size, idxout.begin());
  } else {
-    FDERROR << "Only support process inference result with INT32/INT64 data type, but now it's " << idx_dtype << "." << std::endl;
+    FDERROR << "Only support process inference result with INT32/INT64 data "
+               "type, but now it's "
+            << idx_dtype << "." << std::endl;
  }
  GetFinalPredictions(heatmap, out_data_shape, idxout, center, scale, &preds,
                      this->use_dark);
@@ -176,7 +180,8 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,

 bool PPTinyPose::Predict(cv::Mat* im, KeyPointDetectionResult* result) {
  std::vector<float> center = {round(im->cols / 2.0f), round(im->rows / 2.0f)};
-  std::vector<float> scale = {static_cast<float>(im->cols), static_cast<float>(im->rows)};
+  std::vector<float> scale = {static_cast<float>(im->cols),
+                              static_cast<float>(im->rows)};
  Mat mat(*im);
  std::vector<FDTensor> processed_data;
  if (!Preprocess(&mat, &processed_data)) {
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -29,9 +29,7 @@ void BindKeyPointDetection(pybind11::module& m);
 void BindHeadPose(pybind11::module& m);
 void BindSR(pybind11::module& m);
 void BindGeneration(pybind11::module& m);
-#ifdef ENABLE_VISION_VISUALIZE
 void BindVisualize(pybind11::module& m);
-#endif

 void BindVision(pybind11::module& m) {
  pybind11::class_<vision::Mask>(m, "Mask")
@@ -39,20 +37,20 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("data", &vision::Mask::data)
      .def_readwrite("shape", &vision::Mask::shape)
      .def(pybind11::pickle(
-        [](const vision::Mask &m) { 
+          [](const vision::Mask& m) {
            return pybind11::make_tuple(m.data, m.shape);
-        },
-        [](pybind11::tuple t) { 
+          },
+          [](pybind11::tuple t) {
            if (t.size() != 2)
-                throw std::runtime_error("vision::Mask pickle with invalid state!");
+              throw std::runtime_error(
+                  "vision::Mask pickle with invalid state!");

            vision::Mask m;
            m.data = t[0].cast<std::vector<int32_t>>();
            m.shape = t[1].cast<std::vector<int64_t>>();

            return m;
-        }
-      ))
+          }))
      .def("__repr__", &vision::Mask::Str)
      .def("__str__", &vision::Mask::Str);

@@ -61,20 +59,20 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
      .def_readwrite("scores", &vision::ClassifyResult::scores)
      .def(pybind11::pickle(
-        [](const vision::ClassifyResult &c) { 
+          [](const vision::ClassifyResult& c) {
            return pybind11::make_tuple(c.label_ids, c.scores);
-        },
-        [](pybind11::tuple t) { 
+          },
+          [](pybind11::tuple t) {
            if (t.size() != 2)
-                throw std::runtime_error("vision::ClassifyResult pickle with invalid state!");
+              throw std::runtime_error(
+                  "vision::ClassifyResult pickle with invalid state!");

            vision::ClassifyResult c;
            c.label_ids = t[0].cast<std::vector<int32_t>>();
            c.scores = t[1].cast<std::vector<float>>();

            return c;
-        }
-      ))
+          }))
      .def("__repr__", &vision::ClassifyResult::Str)
      .def("__str__", &vision::ClassifyResult::Str);

@@ -86,12 +84,14 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("masks", &vision::DetectionResult::masks)
      .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks)
      .def(pybind11::pickle(
-        [](const vision::DetectionResult &d) { 
-            return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks, d.contain_masks);
-        },
-        [](pybind11::tuple t) { 
+          [](const vision::DetectionResult& d) {
+            return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks,
+                                        d.contain_masks);
+          },
+          [](pybind11::tuple t) {
            if (t.size() != 5)
-                throw std::runtime_error("vision::DetectionResult pickle with Invalid state!");
+              throw std::runtime_error(
+                  "vision::DetectionResult pickle with Invalid state!");

            vision::DetectionResult d;
            d.boxes = t[0].cast<std::vector<std::array<float, 4>>>();
@@ -101,8 +101,7 @@ void BindVision(pybind11::module& m) {
            d.contain_masks = t[4].cast<bool>();

            return d;
-        }
-      ))
+          }))
      .def("__repr__", &vision::DetectionResult::Str)
      .def("__str__", &vision::DetectionResult::Str);

@@ -152,14 +151,17 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("label_map", &vision::SegmentationResult::label_map)
      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
      .def_readwrite("shape", &vision::SegmentationResult::shape)
-      .def_readwrite("contain_score_map", &vision::SegmentationResult::contain_score_map)
+      .def_readwrite("contain_score_map",
+                     &vision::SegmentationResult::contain_score_map)
      .def(pybind11::pickle(
-        [](const vision::SegmentationResult &s) { 
-            return pybind11::make_tuple(s.label_map, s.score_map, s.shape, s.contain_score_map);
-        },
-        [](pybind11::tuple t) { 
+          [](const vision::SegmentationResult& s) {
+            return pybind11::make_tuple(s.label_map, s.score_map, s.shape,
+                                        s.contain_score_map);
+          },
+          [](pybind11::tuple t) {
            if (t.size() != 4)
-                throw std::runtime_error("vision::SegmentationResult pickle with Invalid state!");
+              throw std::runtime_error(
+                  "vision::SegmentationResult pickle with Invalid state!");

            vision::SegmentationResult s;
            s.label_map = t[0].cast<std::vector<uint8_t>>();
@@ -168,8 +170,7 @@ void BindVision(pybind11::module& m) {
            s.contain_score_map = t[3].cast<bool>();

            return s;
-        }
-      ))
+          }))
      .def("__repr__", &vision::SegmentationResult::Str)
      .def("__str__", &vision::SegmentationResult::Str);

@@ -178,7 +179,8 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("alpha", &vision::MattingResult::alpha)
      .def_readwrite("foreground", &vision::MattingResult::foreground)
      .def_readwrite("shape", &vision::MattingResult::shape)
-      .def_readwrite("contain_foreground", &vision::MattingResult::contain_foreground)
+      .def_readwrite("contain_foreground",
+                     &vision::MattingResult::contain_foreground)
      .def("__repr__", &vision::MattingResult::Str)
      .def("__str__", &vision::MattingResult::Str);

@@ -215,8 +217,6 @@ void BindVision(pybind11::module& m) {
  BindHeadPose(m);
  BindSR(m);
  BindGeneration(m);
-#ifdef ENABLE_VISION_VISUALIZE
  BindVisualize(m);
-#endif
 }
 }  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/classification.cc
+++ b/fastdeploy/vision/visualize/classification.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include <algorithm>

 #include "fastdeploy/vision/visualize/visualize.h"
@@ -54,9 +52,8 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,

 // Visualize ClassifyResult with custom labels.
 cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
-                          const std::vector<std::string>& labels,
-                          int top_k, float score_threshold,
-                          float font_size) {
+                          const std::vector<std::string>& labels, int top_k,
+                          float score_threshold, float font_size) {
  int h = im.rows;
  int w = im.cols;
  auto vis_im = im.clone();
@@ -97,4 +94,3 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/detection.cc
+++ b/fastdeploy/vision/visualize/detection.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include <algorithm>

 #include "fastdeploy/vision/visualize/visualize.h"
@@ -107,8 +105,7 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
 // Visualize DetectionResult with custom labels.
 cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
                     const std::vector<std::string>& labels,
-                     float score_threshold, int line_size,
-                     float font_size) {
+                     float score_threshold, int line_size, float font_size) {
  if (result.contain_masks) {
    FDASSERT(result.boxes.size() == result.masks.size(),
             "The size of masks must be equal to the size of boxes, but now "
@@ -287,4 +284,3 @@ cv::Mat Visualize::VisDetection(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/face_alignment.cc
+++ b/fastdeploy/vision/visualize/face_alignment.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"

@@ -28,10 +26,8 @@ cv::Mat VisFaceAlignment(const cv::Mat& im, const FaceAlignmentResult& result,
  cv::Scalar landmark_color = cv::Scalar(0, 255, 0);
  for (size_t i = 0; i < result.landmarks.size(); ++i) {
    cv::Point landmark;
-    landmark.x = static_cast<int>(
-        result.landmarks[i][0]);
-    landmark.y = static_cast<int>(
-        result.landmarks[i][1]);
+    landmark.x = static_cast<int>(result.landmarks[i][0]);
+    landmark.y = static_cast<int>(result.landmarks[i][1]);
    cv::circle(vis_im, landmark, line_size, landmark_color, -1);
  }
  return vis_im;
@@ -39,5 +35,3 @@ cv::Mat VisFaceAlignment(const cv::Mat& im, const FaceAlignmentResult& result,

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
--- a/fastdeploy/vision/visualize/face_detection.cc
+++ b/fastdeploy/vision/visualize/face_detection.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"

@@ -137,5 +135,3 @@ cv::Mat Visualize::VisFaceDetection(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
--- a/fastdeploy/vision/visualize/headpose.cc
+++ b/fastdeploy/vision/visualize/headpose.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"

@@ -21,8 +19,8 @@ namespace fastdeploy {

 namespace vision {

-cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result,
-                    int size, int line_size) {
+cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result, int size,
+                    int line_size) {
  const float PI = 3.1415926535;
  auto vis_im = im.clone();
  int h = im.rows;
@@ -37,23 +35,28 @@ cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result,

  // X-Axis | drawn in red
  int x1 = static_cast<int>(size * std::cos(yaw) * std::cos(roll)) + tdx;
-  int y1 = static_cast<int>(size * (std::cos(pitch) * std::sin(roll) + 
-                            std::cos(roll) * std::sin(pitch) * std::sin(yaw))) + tdy;
+  int y1 = static_cast<int>(
+               size * (std::cos(pitch) * std::sin(roll) +
+                       std::cos(roll) * std::sin(pitch) * std::sin(yaw))) +
+           tdy;
  // Y-Axis | drawn in green
  int x2 = static_cast<int>(-size * std::cos(yaw) * std::sin(roll)) + tdx;
-  int y2 = static_cast<int>(size * (std::cos(pitch) * std::cos(roll) - 
-                            std::sin(pitch) * std::sin(yaw) * std::sin(roll))) + tdy;
+  int y2 = static_cast<int>(
+               size * (std::cos(pitch) * std::cos(roll) -
+                       std::sin(pitch) * std::sin(yaw) * std::sin(roll))) +
+           tdy;
  // Z-Axis | drawn in blue
  int x3 = static_cast<int>(size * std::sin(yaw)) + tdx;
  int y3 = static_cast<int>(-size * std::cos(yaw) * std::sin(pitch)) + tdy;

-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1), cv::Scalar(0, 0, 255), line_size);
-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2), cv::Scalar(0, 255, 0), line_size);
-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3), cv::Scalar(255, 0, 0), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1),
+           cv::Scalar(0, 0, 255), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2),
+           cv::Scalar(0, 255, 0), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3),
+           cv::Scalar(255, 0, 0), line_size);
  return vis_im;
 }

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
--- a/fastdeploy/vision/visualize/keypoint.cc
+++ b/fastdeploy/vision/visualize/keypoint.cc
@@ -11,9 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"

 namespace fastdeploy {
@@ -29,7 +26,7 @@ cv::Mat VisKeypointDetection(const cv::Mat& im,
  auto colormap = GenerateColorMap();
  cv::Mat vis_img = im.clone();
  int detection_nums = results.keypoints.size() / 17;
-  for (int i = 0; i < detection_nums; i++){
+  for (int i = 0; i < detection_nums; i++) {
    int index = i * 17;
    bool is_over_threshold = true;
    for (int j = 0; j < results.num_joints; j++) {
@@ -43,20 +40,18 @@ cv::Mat VisKeypointDetection(const cv::Mat& im,
        int x_coord = int(results.keypoints[index + k][0]);
        int y_coord = int(results.keypoints[index + k][1]);
        cv::circle(vis_img, cv::Point2d(x_coord, y_coord), 1,
-                  cv::Scalar(0, 0, 255), 2);
+                   cv::Scalar(0, 0, 255), 2);
        int x_start = int(results.keypoints[index + edge[k][0]][0]);
        int y_start = int(results.keypoints[index + edge[k][0]][1]);
        int x_end = int(results.keypoints[index + edge[k][1]][0]);
        int y_end = int(results.keypoints[index + edge[k][1]][1]);
-        cv::line(vis_img, cv::Point2d(x_start, y_start), cv::Point2d(x_end, y_end),
-                colormap[k], 1);
+        cv::line(vis_img, cv::Point2d(x_start, y_start),
+                 cv::Point2d(x_end, y_end), colormap[k], 1);
      }
    }
-    
  }
  return vis_img;
 }

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/matting.cc
+++ b/fastdeploy/vision/visualize/matting.cc
@@ -11,9 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -120,4 +117,3 @@ cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/mot.cc
+++ b/fastdeploy/vision/visualize/mot.cc
@@ -12,21 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-#include "fastdeploy/vision/visualize/visualize.h"
 #include <iomanip>

+#include "fastdeploy/vision/visualize/visualize.h"
+
 namespace fastdeploy {
 namespace vision {

 cv::Scalar GetMOTBoxColor(int idx) {
  idx = idx * 3;
-  cv::Scalar color = cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
+  cv::Scalar color =
+      cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
  return color;
 }

 cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
-               float score_threshold, tracking::TrailRecorder* recorder) {
+               float score_threshold, tracking::TrailRecorder *recorder) {
  cv::Mat vis_img = img.clone();
  int im_h = img.rows;
  int im_w = img.cols;
@@ -35,56 +36,44 @@ cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
  float line_thickness = std::max(1, static_cast<int>(im_w / 500.));
  for (int i = 0; i < results.boxes.size(); ++i) {
    if (results.scores[i] < score_threshold) {
-        continue;
+      continue;
    }
    const int obj_id = results.ids[i];
    const float score = results.scores[i];
    cv::Scalar color = GetMOTBoxColor(obj_id);
-    if (recorder != nullptr){
+    if (recorder != nullptr) {
      int id = results.ids[i];
      auto iter = recorder->records.find(id);
      if (iter != recorder->records.end()) {
        for (int j = 0; j < iter->second.size(); j++) {
-            cv::Point center(iter->second[j][0], iter->second[j][1]);
-            cv::circle(vis_img, center, text_thickness, color);
+          cv::Point center(iter->second[j][0], iter->second[j][1]);
+          cv::circle(vis_img, center, text_thickness, color);
        }
      }
    }
    cv::Point pt1 = cv::Point(results.boxes[i][0], results.boxes[i][1]);
    cv::Point pt2 = cv::Point(results.boxes[i][2], results.boxes[i][3]);
-    cv::Point id_pt =
-            cv::Point(results.boxes[i][0], results.boxes[i][1] + 10);
+    cv::Point id_pt = cv::Point(results.boxes[i][0], results.boxes[i][1] + 10);
    cv::Point score_pt =
-            cv::Point(results.boxes[i][0], results.boxes[i][1] - 10);
+        cv::Point(results.boxes[i][0], results.boxes[i][1] - 10);
    cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
    std::ostringstream idoss;
    idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
    idoss << obj_id;
    std::string id_text = idoss.str();

-    cv::putText(vis_img,
-                id_text,
-                id_pt,
-                cv::FONT_HERSHEY_PLAIN,
-                text_scale,
-                color,
-                text_thickness);
+    cv::putText(vis_img, id_text, id_pt, cv::FONT_HERSHEY_PLAIN, text_scale,
+                color, text_thickness);

    std::ostringstream soss;
    soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
    soss << score;
    std::string score_text = soss.str();

-    cv::putText(vis_img,
-                score_text,
-                score_pt,
-                cv::FONT_HERSHEY_PLAIN,
-                text_scale,
-                color,
-                text_thickness);
+    cv::putText(vis_img, score_text, score_pt, cv::FONT_HERSHEY_PLAIN,
+                text_scale, color, text_thickness);
  }
  return vis_img;
 }
-}// namespace vision
-} //namespace fastdepoly
-#endif
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/ocr.cc
+++ b/fastdeploy/vision/visualize/ocr.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"

 namespace fastdeploy {
@@ -63,4 +61,3 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/remove_small_connnected_area.cc
+++ b/fastdeploy/vision/visualize/remove_small_connnected_area.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -110,4 +108,3 @@ cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat& alpha_pred,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/segmentation.cc
+++ b/fastdeploy/vision/visualize/segmentation.cc
@@ -12,19 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy/vision/visualize/segmentation_arm.h"
+#include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"

 namespace fastdeploy {
 namespace vision {

-static cv::Mat VisSegmentationCommonCpu(
-  const cv::Mat& im, const SegmentationResult& result,
-  float weight) {
+static cv::Mat VisSegmentationCommonCpu(const cv::Mat& im,
+                                        const SegmentationResult& result,
+                                        float weight) {
  // Use the native c++ version without any optimization.
  auto color_map = GenerateColorMap(1000);
  int64_t height = result.shape[0];
@@ -75,4 +73,3 @@ cv::Mat Visualize::VisSegmentation(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/segmentation_arm.cc
+++ b/fastdeploy/vision/visualize/segmentation_arm.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
 #include "fastdeploy/vision/visualize/segmentation_arm.h"
 #ifdef __ARM_NEON
 #include <arm_neon.h>
@@ -24,8 +22,9 @@ namespace vision {

 static constexpr int _OMP_THREADS = 2;

-static inline void QuantizeBlendingWeight8(
-  float weight, uint8_t* old_multi_factor, uint8_t* new_multi_factor) {
+static inline void QuantizeBlendingWeight8(float weight,
+                                           uint8_t* old_multi_factor,
+                                           uint8_t* new_multi_factor) {
  // Quantize the weight to boost blending performance.
  // if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7
  // if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6
@@ -39,27 +38,27 @@ static inline void QuantizeBlendingWeight8(
  *old_multi_factor = (8 - weight_quantize);
 }

-cv::Mat VisSegmentationNEON(
-  const cv::Mat& im, const SegmentationResult& result,
-  float weight, bool quantize_weight) {
+cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result,
+                            float weight, bool quantize_weight) {
 #ifndef __ARM_NEON
-   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
 #else
  int64_t height = result.shape[0];
  int64_t width = result.shape[1];
  auto vis_img = cv::Mat(height, width, CV_8UC3);

  int32_t size = static_cast<int32_t>(height * width);
-  uint8_t *vis_ptr = static_cast<uint8_t*>(vis_img.data);
-  const uint8_t *label_ptr = static_cast<const uint8_t*>(result.label_map.data());
-  const uint8_t *im_ptr = static_cast<const uint8_t*>(im.data);
+  uint8_t* vis_ptr = static_cast<uint8_t*>(vis_img.data);
+  const uint8_t* label_ptr =
+      static_cast<const uint8_t*>(result.label_map.data());
+  const uint8_t* im_ptr = static_cast<const uint8_t*>(im.data);

  if (!quantize_weight) {
    uint8x16_t zerox16 = vdupq_n_u8(0);
-    #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
    for (int i = 0; i < size - 15; i += 16) {
      uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3);  // 48 bytes
-      uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i);     // 16 bytes
      uint8x16_t ibx16 = bgrx16x3.val[0];
      uint8x16_t igx16 = bgrx16x3.val[1];
      uint8x16_t irx16 = bgrx16x3.val[2];
@@ -91,17 +90,16 @@ cv::Mat VisSegmentationNEON(
  // to blend the colors from input im and mask. Please
  // check QuantizeBlendingWeight8 for more details.
  uint8_t old_multi_factor, new_multi_factor;
-  QuantizeBlendingWeight8(weight, &old_multi_factor,
-                          &new_multi_factor);     
+  QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor);
  if (new_multi_factor == 0) {
-    return im; // Only keep origin image.
+    return im;  // Only keep origin image.
  }

  if (new_multi_factor == 8) {
-    // Only keep mask, no need to blending with origin image.
-    #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+// Only keep mask, no need to blending with origin image.
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
    for (int i = 0; i < size - 15; i += 16) {
-      uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i);  // 16 bytes
      // e.g 0b00000001 << 7 -> 0b10000000 128;
      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
@@ -124,11 +122,11 @@ cv::Mat VisSegmentationNEON(
  uint8x16_t zerox16 = vdupq_n_u8(0);
  uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor);
  uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor);
-  // Blend the two colors together with quantize 'weight'.
-  #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+// Blend the two colors together with quantize 'weight'.
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
  for (int i = 0; i < size - 15; i += 16) {
    uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3);  // 48 bytes
-    uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+    uint8x16_t labelx16 = vld1q_u8(label_ptr + i);     // 16 bytes
    uint8x16_t ibx16 = bgrx16x3.val[0];
    uint8x16_t igx16 = bgrx16x3.val[1];
    uint8x16_t irx16 = bgrx16x3.val[2];
@@ -164,12 +162,12 @@ cv::Mat VisSegmentationNEON(
  }
  for (int i = size - 15; i < size; i++) {
    uint8_t label = label_ptr[i];
-    vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor 
-      + ((label << 7) >> 3) * new_multi_factor; 
-    vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor 
-      + ((label << 4) >> 3) * new_multi_factor; 
-    vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor 
-      + ((label << 3) >> 3) * new_multi_factor;   
+    vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor +
+                         ((label << 7) >> 3) * new_multi_factor;
+    vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor +
+                         ((label << 4) >> 3) * new_multi_factor;
+    vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor +
+                         ((label << 3) >> 3) * new_multi_factor;
  }
  return vis_img;
 #endif
@@ -177,5 +175,3 @@ cv::Mat VisSegmentationNEON(

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
--- a/fastdeploy/vision/visualize/segmentation_arm.h
+++ b/fastdeploy/vision/visualize/segmentation_arm.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
 #pragma once

 #include "fastdeploy/vision/common/result.h"
@@ -26,6 +25,3 @@ cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result,

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
-
--- a/fastdeploy/vision/visualize/swap_background.cc
+++ b/fastdeploy/vision/visualize/swap_background.cc
@@ -12,20 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
+#include "fastdeploy/utils/utils.h"
 #include "fastdeploy/vision/visualize/swap_background_arm.h"
+#include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
-#include "fastdeploy/utils/utils.h"

 namespace fastdeploy {
 namespace vision {

-static cv::Mat SwapBackgroundCommonCpu(
-  const cv::Mat& im, const cv::Mat& background,
-  const MattingResult& result, bool remove_small_connected_area) {
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im,
+                                       const cv::Mat& background,
+                                       const MattingResult& result,
+                                       bool remove_small_connected_area) {
  FDASSERT((!im.empty()), "Image can't be empty!");
  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
  FDASSERT((!background.empty()), "Background image can't be empty!");
@@ -77,9 +76,10 @@ static cv::Mat SwapBackgroundCommonCpu(
  return vis_img;
 }

-static cv::Mat SwapBackgroundCommonCpu(
-  const cv::Mat& im, const cv::Mat& background,
-  const SegmentationResult& result, int background_label) {
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im,
+                                       const cv::Mat& background,
+                                       const SegmentationResult& result,
+                                       int background_label) {
  FDASSERT((!im.empty()), "Image can't be empty!");
  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
  FDASSERT((!background.empty()), "Background image can't be empty!");
@@ -178,4 +178,3 @@ cv::Mat Visualize::SwapBackgroundSegmentation(

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/swap_background_arm.cc
+++ b/fastdeploy/vision/visualize/swap_background_arm.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
-#include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy/vision/visualize/swap_background_arm.h"
+
+#include "fastdeploy/vision/visualize/visualize.h"
 #ifdef __ARM_NEON
 #include <arm_neon.h>
 #endif
@@ -25,209 +25,214 @@ namespace vision {

 static constexpr int _OMP_THREADS = 2;

-cv::Mat SwapBackgroundNEON(const cv::Mat& im, 
-                           const cv::Mat& background, 
+cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background,
                           const MattingResult& result,
                           bool remove_small_connected_area) {
 #ifndef __ARM_NEON
-   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!");
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!");
 #else
-   FDASSERT((!im.empty()), "Image can't be empty!");
-   FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
-   FDASSERT((!background.empty()), "Background image can't be empty!");
-   FDASSERT((background.channels() == 3),
-            "Only support 3 channels background image mat!");
-   int out_h = static_cast<int>(result.shape[0]);
-   int out_w = static_cast<int>(result.shape[1]);
-   int height = im.rows;
-   int width = im.cols;
-   int bg_height = background.rows;
-   int bg_width = background.cols;
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;

-   // WARN: may change the original alpha
-   float* alpha_ptr = const_cast<float*>(result.alpha.data());
+  // WARN: may change the original alpha
+  float* alpha_ptr = const_cast<float*>(result.alpha.data());

-   cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
-   if (remove_small_connected_area) {
-      alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
-   }
-   auto vis_img = cv::Mat(height, width, CV_8UC3);  
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  if (remove_small_connected_area) {
+    alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
+  }
+  auto vis_img = cv::Mat(height, width, CV_8UC3);

-   cv::Mat background_ref;
-   if ((bg_height != height) || (bg_width != width)) {
-      cv::resize(background, background_ref, cv::Size(width, height));
-   } else {
-      background_ref = background; // ref only
-   }
-   if ((background_ref).type() != CV_8UC3) {
-      (background_ref).convertTo((background_ref), CV_8UC3);
-   }
+  cv::Mat background_ref;
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_ref, cv::Size(width, height));
+  } else {
+    background_ref = background;  // ref only
+  }
+  if ((background_ref).type() != CV_8UC3) {
+    (background_ref).convertTo((background_ref), CV_8UC3);
+  }

-   if ((out_h != height) || (out_w != width)) {
-      cv::resize(alpha, alpha, cv::Size(width, height));
-   }
+  if ((out_h != height) || (out_w != width)) {
+    cv::resize(alpha, alpha, cv::Size(width, height));
+  }

-   uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
-   const uint8_t* background_data = static_cast<const uint8_t*>(background_ref.data);
-   const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
-   const float* alpha_data = reinterpret_cast<const float*>(alpha.data);
+  uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
+  const uint8_t* background_data =
+      static_cast<const uint8_t*>(background_ref.data);
+  const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
+  const float* alpha_data = reinterpret_cast<const float*>(alpha.data);

-   const int32_t size = static_cast<int32_t>(height * width);
-   #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
-   for(int i = 0; i < size - 7; i += 8) {
-      uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3);  // 24 bytes
-      // u8 -> u16 -> u32 -> f32
-      uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]);
-      uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]);
-      uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]);
-      uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3);  // 24 bytes
-      uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]);
-      uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]);
-      uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]);
+  const int32_t size = static_cast<int32_t>(height * width);
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  for (int i = 0; i < size - 7; i += 8) {
+    uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3);  // 24 bytes
+    // u8 -> u16 -> u32 -> f32
+    uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]);
+    uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]);
+    uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]);
+    uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3);  // 24 bytes
+    uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]);
+    uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]);
+    uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]);

-      uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8));
-      uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8));
-      uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8));
-      uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8));
-      uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8));
-      uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8));
+    uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8));
+    uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8));
+    uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8));
+    uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8));
+    uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8));
+    uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8));

-      uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8));
-      uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8));
-      uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8));
-      uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8));
-      uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8));
-      uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8));
+    uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8));
+    uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8));
+    uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8));
+    uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8));
+    uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8));
+    uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8));

-      float32x4_t fhibx4 = vcvtq_f32_u32(hibx4);
-      float32x4_t fhigx4 = vcvtq_f32_u32(higx4);
-      float32x4_t fhirx4 = vcvtq_f32_u32(hirx4);
-      float32x4_t flibx4 = vcvtq_f32_u32(libx4);
-      float32x4_t fligx4 = vcvtq_f32_u32(ligx4);
-      float32x4_t flirx4 = vcvtq_f32_u32(lirx4);
+    float32x4_t fhibx4 = vcvtq_f32_u32(hibx4);
+    float32x4_t fhigx4 = vcvtq_f32_u32(higx4);
+    float32x4_t fhirx4 = vcvtq_f32_u32(hirx4);
+    float32x4_t flibx4 = vcvtq_f32_u32(libx4);
+    float32x4_t fligx4 = vcvtq_f32_u32(ligx4);
+    float32x4_t flirx4 = vcvtq_f32_u32(lirx4);

-      float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4);
-      float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4);
-      float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4);
-      float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4);
-      float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4);
-      float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4);
+    float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4);
+    float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4);
+    float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4);
+    float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4);
+    float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4);
+    float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4);

-      // alpha load from little end
-      float32x4_t lalpx4 = vld1q_f32(alpha_data + i); // low bits
-      float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits
-      float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4);
-      float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4);
+    // alpha load from little end
+    float32x4_t lalpx4 = vld1q_f32(alpha_data + i);      // low bits
+    float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4);  // high bits
+    float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4);
+    float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4);

-      // blending 
-      float32x4_t fhvbx4 = vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4));
-      float32x4_t fhvgx4 = vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4));
-      float32x4_t fhvrx4 = vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4));
-      float32x4_t flvbx4 = vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4));
-      float32x4_t flvgx4 = vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4));
-      float32x4_t flvrx4 = vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4));
+    // blending
+    float32x4_t fhvbx4 =
+        vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4));
+    float32x4_t fhvgx4 =
+        vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4));
+    float32x4_t fhvrx4 =
+        vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4));
+    float32x4_t flvbx4 =
+        vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4));
+    float32x4_t flvgx4 =
+        vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4));
+    float32x4_t flvrx4 =
+        vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4));

-      // f32 -> u32 -> u16 -> u8
-      uint8x8x3_t vbgrx8x3;
-      // combine low 64 bits and high 64 bits into one 128 neon register
-      vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)), 
-                                               vmovn_u32(vcvtq_u32_f32(fhvbx4))));
-      vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)), 
-                                               vmovn_u32(vcvtq_u32_f32(fhvgx4))));
-      vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)), 
-                                               vmovn_u32(vcvtq_u32_f32(fhvrx4))));                                         
-      vst3_u8(vis_data + i * 3, vbgrx8x3);
-   }
+    // f32 -> u32 -> u16 -> u8
+    uint8x8x3_t vbgrx8x3;
+    // combine low 64 bits and high 64 bits into one 128 neon register
+    vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvbx4))));
+    vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvgx4))));
+    vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)),
+                                             vmovn_u32(vcvtq_u32_f32(fhvrx4))));
+    vst3_u8(vis_data + i * 3, vbgrx8x3);
+  }

-   for (int i = size - 7; i < size; i++) {
-      float alp = alpha_data[i];
-      for (int c = 0; c < 3; ++c) {
-         vis_data[i * 3 + 0] = cv::saturate_cast<uchar>(
-            static_cast<float>(im_data[i * 3 + c]) * alp  + (1.0f - alp) 
-            * static_cast<float>(background_data[i * 3 + c]));
-      }
-   }
+  for (int i = size - 7; i < size; i++) {
+    float alp = alpha_data[i];
+    for (int c = 0; c < 3; ++c) {
+      vis_data[i * 3 + 0] = cv::saturate_cast<uchar>(
+          static_cast<float>(im_data[i * 3 + c]) * alp +
+          (1.0f - alp) * static_cast<float>(background_data[i * 3 + c]));
+    }
+  }

-   return vis_img;
+  return vis_img;
 #endif
 }

-cv::Mat SwapBackgroundNEON(const cv::Mat& im,
-                           const cv::Mat& background,
+cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background,
                           const SegmentationResult& result,
                           int background_label) {
 #ifndef __ARM_NEON
-   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
 #else
-   FDASSERT((!im.empty()), "Image can't be empty!");
-   FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
-   FDASSERT((!background.empty()), "Background image can't be empty!");
-   FDASSERT((background.channels() == 3),
-            "Only support 3 channels background image mat!");
-   int out_h = static_cast<int>(result.shape[0]);
-   int out_w = static_cast<int>(result.shape[1]);
-   int height = im.rows;
-   int width = im.cols;
-   int bg_height = background.rows;
-   int bg_width = background.cols;
-   auto vis_img = cv::Mat(height, width, CV_8UC3);  
+  FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((background.channels() == 3),
+           "Only support 3 channels background image mat!");
+  int out_h = static_cast<int>(result.shape[0]);
+  int out_w = static_cast<int>(result.shape[1]);
+  int height = im.rows;
+  int width = im.cols;
+  int bg_height = background.rows;
+  int bg_width = background.cols;
+  auto vis_img = cv::Mat(height, width, CV_8UC3);

-   cv::Mat background_ref;
-   if ((bg_height != height) || (bg_width != width)) {
-      cv::resize(background, background_ref, cv::Size(width, height));
-   } else {
-      background_ref = background; // ref only
-   }
-   if ((background_ref).type() != CV_8UC3) {
-      (background_ref).convertTo((background_ref), CV_8UC3);
-   }
+  cv::Mat background_ref;
+  if ((bg_height != height) || (bg_width != width)) {
+    cv::resize(background, background_ref, cv::Size(width, height));
+  } else {
+    background_ref = background;  // ref only
+  }
+  if ((background_ref).type() != CV_8UC3) {
+    (background_ref).convertTo((background_ref), CV_8UC3);
+  }

-   uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
-   const uint8_t* background_data = static_cast<const uint8_t*>(background_ref.data);
-   const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
-   const uint8_t *label_data = static_cast<const uint8_t*>(result.label_map.data());
+  uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
+  const uint8_t* background_data =
+      static_cast<const uint8_t*>(background_ref.data);
+  const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
+  const uint8_t* label_data =
+      static_cast<const uint8_t*>(result.label_map.data());

-   const uint8_t background_label_ = static_cast<uint8_t>(background_label);
-   const int32_t size = static_cast<int32_t>(height * width);
+  const uint8_t background_label_ = static_cast<uint8_t>(background_label);
+  const int32_t size = static_cast<int32_t>(height * width);

-   uint8x16_t backgroundx16 = vdupq_n_u8(background_label_);
-   #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
-   for (int i = 0; i < size - 15; i += 16) {
-      uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes
-      uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3);
-      uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes
-      // Set mask bit = 1 if label != background_label
-      uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16);
-      uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1
-      uint8x16x3_t vbgr16x3;
-      vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16), 
-                                 vandq_u8(bbgr16x3.val[0], nkeepx16));
-      vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16), 
-                                 vandq_u8(bbgr16x3.val[1], nkeepx16));
-      vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16), 
-                                 vandq_u8(bbgr16x3.val[2], nkeepx16));
-      // Store the blended pixels to vis img
-      vst3q_u8(vis_data + i * 3, vbgr16x3);
-   }
+  uint8x16_t backgroundx16 = vdupq_n_u8(background_label_);
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  for (int i = 0; i < size - 15; i += 16) {
+    uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3);  // 48 bytes
+    uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3);
+    uint8x16_t labelx16 = vld1q_u8(label_data + i);  // 16 bytes
+    // Set mask bit = 1 if label != background_label
+    uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16);
+    uint8x16_t keepx16 = vmvnq_u8(nkeepx16);  // keep_value = 1
+    uint8x16x3_t vbgr16x3;
+    vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16),
+                               vandq_u8(bbgr16x3.val[0], nkeepx16));
+    vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16),
+                               vandq_u8(bbgr16x3.val[1], nkeepx16));
+    vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16),
+                               vandq_u8(bbgr16x3.val[2], nkeepx16));
+    // Store the blended pixels to vis img
+    vst3q_u8(vis_data + i * 3, vbgr16x3);
+  }

-   for (int i = size - 15; i < size; i++) {
-      uint8_t label = label_data[i];
-      if (label != background_label_) {
-         vis_data[i * 3 + 0] = im_data[i * 3 + 0];
-         vis_data[i * 3 + 1] = im_data[i * 3 + 1];
-         vis_data[i * 3 + 2] = im_data[i * 3 + 2];
-      } else {
-         vis_data[i * 3 + 0] = background_data[i * 3 + 0];
-         vis_data[i * 3 + 1] = background_data[i * 3 + 1];
-         vis_data[i * 3 + 2] = background_data[i * 3 + 2];
-      }
-   }
+  for (int i = size - 15; i < size; i++) {
+    uint8_t label = label_data[i];
+    if (label != background_label_) {
+      vis_data[i * 3 + 0] = im_data[i * 3 + 0];
+      vis_data[i * 3 + 1] = im_data[i * 3 + 1];
+      vis_data[i * 3 + 2] = im_data[i * 3 + 2];
+    } else {
+      vis_data[i * 3 + 0] = background_data[i * 3 + 0];
+      vis_data[i * 3 + 1] = background_data[i * 3 + 1];
+      vis_data[i * 3 + 2] = background_data[i * 3 + 2];
+    }
+  }

-   return vis_img;
+  return vis_img;
 #endif
 }

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
--- a/fastdeploy/vision/visualize/swap_background_arm.h
+++ b/fastdeploy/vision/visualize/swap_background_arm.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
 #pragma once

 #include "fastdeploy/vision/common/result.h"
@@ -33,6 +32,3 @@ cv::Mat SwapBackgroundNEON(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-
-#endif
-
--- a/fastdeploy/vision/visualize/visualize.cc
+++ b/fastdeploy/vision/visualize/visualize.cc
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"

 namespace fastdeploy {
@@ -66,4 +65,3 @@ const std::vector<int>& Visualize::GetColorMap(int num_classes) {

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#ifdef ENABLE_VISION_VISUALIZE
 #pragma once

 #include "fastdeploy/vision/common/result.h"
@@ -202,4 +201,3 @@ FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,

 }  // namespace vision
 }  // namespace fastdeploy
-#endif
--- a/scripts/android/build_android_cpp.sh
+++ b/scripts/android/build_android_cpp.sh
@@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() {
        -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
        -DENABLE_ORT_BACKEND=OFF \
        -DENABLE_LITE_BACKEND=ON \
-        -DENABLE_PADDLE_FRONTEND=OFF \
+        -DENABLE_PADDLE2ONNX=OFF \
        -DENABLE_FLYCV=ON \
        -DENABLE_TEXT=OFF \
        -DENABLE_VISION=ON \
-        -DENABLE_VISION_VISUALIZE=ON \
        -DBUILD_EXAMPLES=ON \
        -DWITH_OPENCV_STATIC=OFF \
        -DWITH_LITE_STATIC=OFF \
--- a/scripts/android/build_android_cpp_with_text_api.sh
+++ b/scripts/android/build_android_cpp_with_text_api.sh
@@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() {
        -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
        -DENABLE_ORT_BACKEND=OFF \
        -DENABLE_LITE_BACKEND=ON \
-        -DENABLE_PADDLE_FRONTEND=OFF \
+        -DENABLE_PADDLE2ONNX=OFF \
        -DENABLE_FLYCV=ON \
        -DENABLE_TEXT=ON \
        -DENABLE_VISION=ON \
-        -DENABLE_VISION_VISUALIZE=ON \
        -DBUILD_EXAMPLES=ON \
        -DWITH_OPENCV_STATIC=OFF \
        -DWITH_LITE_STATIC=OFF \
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -88,7 +88,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
            python setup.py bdist_wheel;
            cd /workspace/fastdeploy;
            rm -rf build; mkdir -p build;cd build;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install;
            cd /workspace/fastdeploy/serving;
@@ -121,7 +121,7 @@ docker run -i --rm --name ${docker_name} \
            python setup.py bdist_wheel;
            cd /workspace/fastdeploy;
            rm -rf build; mkdir build; cd build;
-            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install;
            cd /workspace/fastdeploy/serving;