[Other] Remove useless macros (#1095)

* Remove useless macros * triger ci * fix check error * rename INTEGRATE_PADDLE2ONNX to ENABLE_PADDLE2ONNX
2025-10-05 16:48:03 +08:00 · 2023-01-09 21:35:23 +08:00
parent 48bc7241cb
commit df20b2a02b
35 changed files with 510 additions and 559 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,25 +103,8 @@ if(NOT GIT_URL)
    set(GIT_URL "https://github.com")
 endif()
-# Check for 32bit system
+# check build options
-if(WIN32)
+include(${PROJECT_SOURCE_DIR}/cmake/check.cmake)
  if(NOT CMAKE_CL_64)
    message("***********************Compile on non 64-bit system now**********************")
    add_definitions(-DNON_64_PLATFORM)
    if(WITH_GPU)
      message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_PADDLE_BACKEND)
      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_POROS_BACKEND)
    message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_VISION)
      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
    endif()
  endif()
 endif()
 if(WIN32 AND ENABLE_VISION)
  add_definitions(-DYAML_CPP_DLL)
@@ -149,30 +132,15 @@ if (WITH_ASCEND)
 endif()
 if (WITH_KUNLUNXIN)
-  if(NOT ENABLE_LITE_BACKEND)
+  include(${PROJECT_SOURCE_DIR}/cmake/kunlunxin.cmake)
      set(ENABLE_LITE_BACKEND ON)
  endif()
  if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
    message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
  endif()
  if(NOT PADDLELITE_URL)
    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
  endif()
 endif()
-if(ANDROID OR IOS)
+if(WITH_IPU)
-  if(ENABLE_ORT_BACKEND)
+  if(NOT ENABLE_PADDLE_BACKEND)
-    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
+    message("Will force to set ENABLE_PADDLE_BACKEND when build with GraphCore IPU.")
-  endif()
+    set(ENABLE_PADDLE_BACKEND ON)
  if(ENABLE_PADDLE_BACKEND)
    message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.")
  endif()
  if(ENABLE_OPENVINO_BACKEND)
    message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.")
  endif()
  if(ENABLE_TRT_BACKEND)
    message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.")
  endif()
  add_definitions(-DWITH_IPU)
 endif()
 # Check for macOS architecture
@@ -208,7 +176,6 @@ set(DEPEND_LIBS "")
 file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
 string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
 # Add eigen lib
 include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen)
 if(WIN32)
@@ -221,7 +188,7 @@ if(WITH_SW)
 endif()
 if(ENABLE_ORT_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_ORT_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
@@ -236,7 +203,7 @@ if(ENABLE_LITE_BACKEND)
 endif()
 if(ENABLE_PADDLE_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_PADDLE_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake)
@@ -250,7 +217,7 @@ if(ENABLE_PADDLE_BACKEND)
 endif()
 if(ENABLE_OPENVINO_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  add_definitions(-DENABLE_OPENVINO_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
@@ -313,36 +280,24 @@ if(ENABLE_POROS_BACKEND)
 endif()
 if(WITH_GPU)
-  if(APPLE)
+  add_definitions(-DWITH_GPU)
-    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
+  include_directories(${CUDA_DIRECTORY}/include)
-    set(WITH_GPU OFF)
+  if(WIN32)
-  elseif(ANDROID OR IOS)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
    message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.")
    set(WITH_GPU OFF)
  else()
-    add_definitions(-DWITH_GPU)
+    find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
    include_directories(${CUDA_DIRECTORY}/include)
    if(WIN32)
      find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
    else()
      find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
    endif()
    list(APPEND DEPEND_LIBS ${CUDA_LIB})
    # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc.
    enable_language(CUDA)
    message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
                    "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
    include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake)
  endif()
-endif()
+  list(APPEND DEPEND_LIBS ${CUDA_LIB})
-if(WITH_IPU)
+  # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc.
-  add_definitions(-DWITH_IPU)
+  enable_language(CUDA)
  message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
                  "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
  include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake)
 endif()
 if(ENABLE_TRT_BACKEND)
-  set(ENABLE_PADDLE_FRONTEND ON)
+  set(ENABLE_PADDLE2ONNX ON)
  if(APPLE OR ANDROID OR IOS)
    message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")
  endif()
@@ -406,7 +361,6 @@ endif()
 if(ENABLE_VISION)
  add_definitions(-DENABLE_VISION)
  add_definitions(-DENABLE_VISION_VISUALIZE)
  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
  list(APPEND DEPEND_LIBS yaml-cpp)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS})
@@ -435,11 +389,11 @@ if(ENABLE_ENCRYPTION)
  list(APPEND DEPEND_LIBS ${OPENSSL_LIBRARIES})
 endif()
-if(ENABLE_PADDLE_FRONTEND)
+if(ENABLE_PADDLE2ONNX)
-  add_definitions(-DENABLE_PADDLE_FRONTEND)
+  add_definitions(-DENABLE_PADDLE2ONNX)
  include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
  list(APPEND DEPEND_LIBS external_paddle2onnx)
-endif(ENABLE_PADDLE_FRONTEND)
+endif(ENABLE_PADDLE2ONNX)
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -10,7 +10,7 @@ set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
 set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
 set(POROS_VERSION @POROS_VERSION@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
-set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
+set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@)
 set(ENABLE_VISION @ENABLE_VISION@)
 set(ENABLE_FLYCV @ENABLE_FLYCV@)
 set(ENABLE_TEXT @ENABLE_TEXT@)
@@ -238,7 +238,7 @@ if (ENABLE_TEXT)
  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
 endif()
-if(ENABLE_PADDLE_FRONTEND)
+if(ENABLE_PADDLE2ONNX)
  if(ANDROID)
    message(FATAL_ERROR "Not support fastdeploy-paddle2onnx APIs with Android now!")
  endif()
--- a/cmake/check.cmake
+++ b/cmake/check.cmake
@@ -0,0 +1,44 @@
 # Check for 32bit system
 if(WIN32)
  if(NOT CMAKE_CL_64)
    message("***********************Compile on non 64-bit system now**********************")
    add_definitions(-DNON_64_PLATFORM)
    if(WITH_GPU)
      message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_PADDLE_BACKEND)
      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_POROS_BACKEND)
      message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_VISION)
      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
    endif()
  endif()
 endif()
 if(ANDROID OR IOS)
  if(ENABLE_ORT_BACKEND)
    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
  endif()
  if(ENABLE_PADDLE_BACKEND)
    message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.")
  endif()
  if(ENABLE_OPENVINO_BACKEND)
    message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.")
  endif()
  if(ENABLE_TRT_BACKEND)
    message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.")
  endif()
 endif()
 if(WITH_GPU)
  if(APPLE)
    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
    set(WITH_GPU OFF)
  elseif(ANDROID OR IOS)
    message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.")
    set(WITH_GPU OFF)
  endif()
 endif()
--- a/cmake/kunlunxin.cmake
+++ b/cmake/kunlunxin.cmake
@@ -0,0 +1,12 @@
 if(NOT ENABLE_LITE_BACKEND)
  message("Will force to set ENABLE_LITE_BACKEND when build with KunlunXin.")
  set(ENABLE_LITE_BACKEND ON)
 endif()
 if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
  message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
 endif()
 if(NOT PADDLELITE_URL)
  set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
 endif()
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -13,6 +13,10 @@
 # limitations under the License.
 include(ExternalProject)
 if(WITH_GPU AND WITH_IPU)
  message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.")
 endif()
 option(PADDLEINFERENCE_DIRECTORY "Directory of Paddle Inference library" OFF)
 set(PADDLEINFERENCE_PROJECT "extern_paddle_inference")
--- a/cmake/timvx.cmake
+++ b/cmake/timvx.cmake
@@ -3,9 +3,9 @@ if(NOT ${ENABLE_LITE_BACKEND})
    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON")
    set(ENABLE_LITE_BACKEND ON)
 endif()
-if(${ENABLE_PADDLE_FRONTEND})
+if(${ENABLE_PADDLE2ONNX})
-    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_FRONTEND=OFF")
+    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE2ONNX=OFF")
-    set(ENABLE_PADDLE_FRONTEND OFF)
+    set(ENABLE_PADDLE2ONNX OFF)
 endif()
 if(${ENABLE_ORT_BACKEND})
    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF")
--- a/docs/api_docs/cpp/Doxyfile
+++ b/docs/api_docs/cpp/Doxyfile
@@ -2100,7 +2100,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-PREDEFINED = protected=private ENABLE_VISION_VISUALIZE=1
+PREDEFINED = protected=private
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -17,8 +17,8 @@
 #cmakedefine FASTDEPLOY_LIB
 #endif
-#ifndef ENABLE_PADDLE_FRONTEND
+#ifndef ENABLE_PADDLE2ONNX
-#cmakedefine ENABLE_PADDLE_FRONTEND
+#cmakedefine ENABLE_PADDLE2ONNX
 #endif
 #ifndef ENABLE_ORT_BACKEND
@@ -56,13 +56,3 @@
 #ifndef ENABLE_TEXT
 #cmakedefine ENABLE_TEXT
 #endif
 #ifdef ENABLE_VISION
 #ifndef ENABLE_VISION_VISUALIZE
 #define ENABLE_VISION_VISUALIZE
 #endif
 #endif
 #ifndef ENABLE_FDTENSOR_FUNC
 #cmakedefine ENABLE_FDTENSOR_FUNC
 #endif
--- a/fastdeploy/runtime/backends/openvino/ov_backend.cc
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 #include "fastdeploy/runtime/backends/openvino/ov_backend.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -21,7 +21,7 @@
 #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
 #include "fastdeploy/runtime/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
@@ -84,7 +84,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
  char* model_content_ptr;
  int model_content_size = 0;
  bool save_external = false;
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
  std::vector<paddle2onnx::CustomOp> ops;
  ops.resize(2);
  strcpy(ops[0].op_name, "multiclass_nms3");
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -21,7 +21,7 @@
 #include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/runtime/backends/paddle/option.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
 #include "fastdeploy/utils/unique_ptr.h"
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -20,7 +20,7 @@
 #include "NvInferRuntime.h"
 #include "fastdeploy/function/cuda_cast.h"
 #include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
@@ -123,7 +123,7 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
  }
  option_ = option;
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
  std::vector<paddle2onnx::CustomOp> ops;
  ops.resize(1);
  strcpy(ops[0].op_name, "pool2d");
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -1,7 +1,8 @@
 #include "fastdeploy/vision/keypointdet/pptinypose/pptinypose.h"
 #include "fastdeploy/vision/utils/utils.h"
 #include "yaml-cpp/yaml.h"
-#ifdef ENABLE_PADDLE_FRONTEND
+#ifdef ENABLE_PADDLE2ONNX
 #include "paddle2onnx/converter.h"
 #endif
 #include "fastdeploy/vision.h"
@@ -16,7 +17,8 @@ PPTinyPose::PPTinyPose(const std::string& model_file,
                       const RuntimeOption& custom_option,
                       const ModelFormat& model_format) {
  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+  valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
                        Backend::LITE};
  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
  valid_kunlunxin_backends = {Backend::LITE};
  runtime_option = custom_option;
@@ -100,11 +102,11 @@ bool PPTinyPose::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
      int resize_height = -1;
      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
      cv::Mat trans_matrix(2, 3, CV_64FC1);
-      GetAffineTransform(center, scale, 0, {resize_width, resize_height}, &trans_matrix, 0);
+      GetAffineTransform(center, scale, 0, {resize_width, resize_height},
                         &trans_matrix, 0);
      if (!(processor->SetTransformMatrix(trans_matrix))) {
-        FDERROR << "Failed to set transform matrix of " 
+        FDERROR << "Failed to set transform matrix of "
-                << processors_[i]->Name()
+                << processors_[i]->Name() << " processor." << std::endl;
                << " processor." << std::endl;
      }
    }
    if (!(*(processors_[i].get()))(mat)) {
@@ -139,7 +141,7 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
  int idxdata_size =
      std::accumulate(infer_result[1].shape.begin(),
                      infer_result[1].shape.end(), 1, std::multiplies<int>());
-  
+
  if (outdata_size < 6) {
    FDWARNING << "PPTinyPose No object detected." << std::endl;
  }
@@ -160,7 +162,9 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
    std::copy(static_cast<int64_t*>(idx_data),
              static_cast<int64_t*>(idx_data) + idxdata_size, idxout.begin());
  } else {
-    FDERROR << "Only support process inference result with INT32/INT64 data type, but now it's " << idx_dtype << "." << std::endl;
+    FDERROR << "Only support process inference result with INT32/INT64 data "
               "type, but now it's "
            << idx_dtype << "." << std::endl;
  }
  GetFinalPredictions(heatmap, out_data_shape, idxout, center, scale, &preds,
                      this->use_dark);
@@ -176,7 +180,8 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
 bool PPTinyPose::Predict(cv::Mat* im, KeyPointDetectionResult* result) {
  std::vector<float> center = {round(im->cols / 2.0f), round(im->rows / 2.0f)};
-  std::vector<float> scale = {static_cast<float>(im->cols), static_cast<float>(im->rows)};
+  std::vector<float> scale = {static_cast<float>(im->cols),
                              static_cast<float>(im->rows)};
  Mat mat(*im);
  std::vector<FDTensor> processed_data;
  if (!Preprocess(&mat, &processed_data)) {
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -29,9 +29,7 @@ void BindKeyPointDetection(pybind11::module& m);
 void BindHeadPose(pybind11::module& m);
 void BindSR(pybind11::module& m);
 void BindGeneration(pybind11::module& m);
 #ifdef ENABLE_VISION_VISUALIZE
 void BindVisualize(pybind11::module& m);
 #endif
 void BindVision(pybind11::module& m) {
  pybind11::class_<vision::Mask>(m, "Mask")
@@ -39,20 +37,20 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("data", &vision::Mask::data)
      .def_readwrite("shape", &vision::Mask::shape)
      .def(pybind11::pickle(
-        [](const vision::Mask &m) { 
+          [](const vision::Mask& m) {
            return pybind11::make_tuple(m.data, m.shape);
-        },
+          },
-        [](pybind11::tuple t) { 
+          [](pybind11::tuple t) {
            if (t.size() != 2)
-                throw std::runtime_error("vision::Mask pickle with invalid state!");
+              throw std::runtime_error(
                  "vision::Mask pickle with invalid state!");
            vision::Mask m;
            m.data = t[0].cast<std::vector<int32_t>>();
            m.shape = t[1].cast<std::vector<int64_t>>();
            return m;
-        }
+          }))
      ))
      .def("__repr__", &vision::Mask::Str)
      .def("__str__", &vision::Mask::Str);
@@ -61,20 +59,20 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
      .def_readwrite("scores", &vision::ClassifyResult::scores)
      .def(pybind11::pickle(
-        [](const vision::ClassifyResult &c) { 
+          [](const vision::ClassifyResult& c) {
            return pybind11::make_tuple(c.label_ids, c.scores);
-        },
+          },
-        [](pybind11::tuple t) { 
+          [](pybind11::tuple t) {
            if (t.size() != 2)
-                throw std::runtime_error("vision::ClassifyResult pickle with invalid state!");
+              throw std::runtime_error(
                  "vision::ClassifyResult pickle with invalid state!");
            vision::ClassifyResult c;
            c.label_ids = t[0].cast<std::vector<int32_t>>();
            c.scores = t[1].cast<std::vector<float>>();
            return c;
-        }
+          }))
      ))
      .def("__repr__", &vision::ClassifyResult::Str)
      .def("__str__", &vision::ClassifyResult::Str);
@@ -86,12 +84,14 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("masks", &vision::DetectionResult::masks)
      .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks)
      .def(pybind11::pickle(
-        [](const vision::DetectionResult &d) { 
+          [](const vision::DetectionResult& d) {
-            return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks, d.contain_masks);
+            return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks,
-        },
+                                        d.contain_masks);
-        [](pybind11::tuple t) { 
+          },
          [](pybind11::tuple t) {
            if (t.size() != 5)
-                throw std::runtime_error("vision::DetectionResult pickle with Invalid state!");
+              throw std::runtime_error(
                  "vision::DetectionResult pickle with Invalid state!");
            vision::DetectionResult d;
            d.boxes = t[0].cast<std::vector<std::array<float, 4>>>();
@@ -101,8 +101,7 @@ void BindVision(pybind11::module& m) {
            d.contain_masks = t[4].cast<bool>();
            return d;
-        }
+          }))
      ))
      .def("__repr__", &vision::DetectionResult::Str)
      .def("__str__", &vision::DetectionResult::Str);
@@ -152,14 +151,17 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("label_map", &vision::SegmentationResult::label_map)
      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
      .def_readwrite("shape", &vision::SegmentationResult::shape)
-      .def_readwrite("contain_score_map", &vision::SegmentationResult::contain_score_map)
+      .def_readwrite("contain_score_map",
                     &vision::SegmentationResult::contain_score_map)
      .def(pybind11::pickle(
-        [](const vision::SegmentationResult &s) { 
+          [](const vision::SegmentationResult& s) {
-            return pybind11::make_tuple(s.label_map, s.score_map, s.shape, s.contain_score_map);
+            return pybind11::make_tuple(s.label_map, s.score_map, s.shape,
-        },
+                                        s.contain_score_map);
-        [](pybind11::tuple t) { 
+          },
          [](pybind11::tuple t) {
            if (t.size() != 4)
-                throw std::runtime_error("vision::SegmentationResult pickle with Invalid state!");
+              throw std::runtime_error(
                  "vision::SegmentationResult pickle with Invalid state!");
            vision::SegmentationResult s;
            s.label_map = t[0].cast<std::vector<uint8_t>>();
@@ -168,8 +170,7 @@ void BindVision(pybind11::module& m) {
            s.contain_score_map = t[3].cast<bool>();
            return s;
-        }
+          }))
      ))
      .def("__repr__", &vision::SegmentationResult::Str)
      .def("__str__", &vision::SegmentationResult::Str);
@@ -178,7 +179,8 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("alpha", &vision::MattingResult::alpha)
      .def_readwrite("foreground", &vision::MattingResult::foreground)
      .def_readwrite("shape", &vision::MattingResult::shape)
-      .def_readwrite("contain_foreground", &vision::MattingResult::contain_foreground)
+      .def_readwrite("contain_foreground",
                     &vision::MattingResult::contain_foreground)
      .def("__repr__", &vision::MattingResult::Str)
      .def("__str__", &vision::MattingResult::Str);
@@ -215,8 +217,6 @@ void BindVision(pybind11::module& m) {
  BindHeadPose(m);
  BindSR(m);
  BindGeneration(m);
 #ifdef ENABLE_VISION_VISUALIZE
  BindVisualize(m);
 #endif
 }
 }  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/classification.cc
+++ b/fastdeploy/vision/visualize/classification.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include <algorithm>
 #include "fastdeploy/vision/visualize/visualize.h"
@@ -46,7 +44,7 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
    cv::Point origin;
    origin.x = w_sep;
    origin.y = h_sep * (i + 1);
-    cv::putText(vis_im, text, origin, font, font_size, 
+    cv::putText(vis_im, text, origin, font, font_size,
                cv::Scalar(255, 255, 255), 1);
  }
  return vis_im;
@@ -54,9 +52,8 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
 // Visualize ClassifyResult with custom labels.
 cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
-                          const std::vector<std::string>& labels,
+                          const std::vector<std::string>& labels, int top_k,
-                          int top_k, float score_threshold,
+                          float score_threshold, float font_size) {
                          float font_size) {
  int h = im.rows;
  int w = im.cols;
  auto vis_im = im.clone();
@@ -78,8 +75,8 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
    if (labels.size() > result.label_ids[i]) {
      text = labels[result.label_ids[i]] + "," + text;
    } else {
-      FDWARNING << "The label_id: " << result.label_ids[i] 
+      FDWARNING << "The label_id: " << result.label_ids[i]
-                << " in DetectionResult should be less than length of labels:" 
+                << " in DetectionResult should be less than length of labels:"
                << labels.size() << "." << std::endl;
    }
    if (text.size() > 16) {
@@ -89,7 +86,7 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
    cv::Point origin;
    origin.x = w_sep;
    origin.y = h_sep * (i + 1);
-    cv::putText(vis_im, text, origin, font, font_size, 
+    cv::putText(vis_im, text, origin, font, font_size,
                cv::Scalar(255, 255, 255), 1);
  }
  return vis_im;
@@ -97,4 +94,3 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/detection.cc
+++ b/fastdeploy/vision/visualize/detection.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include <algorithm>
 #include "fastdeploy/vision/visualize/visualize.h"
@@ -105,10 +103,9 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
 }
 // Visualize DetectionResult with custom labels.
-cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, 
+cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
                     const std::vector<std::string>& labels,
-                     float score_threshold, int line_size,
+                     float score_threshold, int line_size, float font_size) {
                     float font_size) {
  if (result.contain_masks) {
    FDASSERT(result.boxes.size() == result.masks.size(),
             "The size of masks must be equal to the size of boxes, but now "
@@ -145,8 +142,8 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
    if (labels.size() > result.label_ids[i]) {
      text = labels[result.label_ids[i]] + "," + text;
    } else {
-      FDWARNING << "The label_id: " << result.label_ids[i] 
+      FDWARNING << "The label_id: " << result.label_ids[i]
-                << " in DetectionResult should be less than length of labels:" 
+                << " in DetectionResult should be less than length of labels:"
                << labels.size() << "." << std::endl;
    }
    if (text.size() > 16) {
@@ -287,4 +284,3 @@ cv::Mat Visualize::VisDetection(const cv::Mat& im,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/face_alignment.cc
+++ b/fastdeploy/vision/visualize/face_alignment.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -28,16 +26,12 @@ cv::Mat VisFaceAlignment(const cv::Mat& im, const FaceAlignmentResult& result,
  cv::Scalar landmark_color = cv::Scalar(0, 255, 0);
  for (size_t i = 0; i < result.landmarks.size(); ++i) {
    cv::Point landmark;
-    landmark.x = static_cast<int>(
+    landmark.x = static_cast<int>(result.landmarks[i][0]);
-        result.landmarks[i][0]);
+    landmark.y = static_cast<int>(result.landmarks[i][1]);
    landmark.y = static_cast<int>(
        result.landmarks[i][1]);
    cv::circle(vis_im, landmark, line_size, landmark_color, -1);
  }
  return vis_im;
 }
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/face_detection.cc
+++ b/fastdeploy/vision/visualize/face_detection.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -137,5 +135,3 @@ cv::Mat Visualize::VisFaceDetection(const cv::Mat& im,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/headpose.cc
+++ b/fastdeploy/vision/visualize/headpose.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -21,8 +19,8 @@ namespace fastdeploy {
 namespace vision {
-cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result,
+cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result, int size,
-                    int size, int line_size) {
+                    int line_size) {
  const float PI = 3.1415926535;
  auto vis_im = im.clone();
  int h = im.rows;
@@ -37,23 +35,28 @@ cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result,
  // X-Axis | drawn in red
  int x1 = static_cast<int>(size * std::cos(yaw) * std::cos(roll)) + tdx;
-  int y1 = static_cast<int>(size * (std::cos(pitch) * std::sin(roll) + 
+  int y1 = static_cast<int>(
-                            std::cos(roll) * std::sin(pitch) * std::sin(yaw))) + tdy;
+               size * (std::cos(pitch) * std::sin(roll) +
                       std::cos(roll) * std::sin(pitch) * std::sin(yaw))) +
           tdy;
  // Y-Axis | drawn in green
  int x2 = static_cast<int>(-size * std::cos(yaw) * std::sin(roll)) + tdx;
-  int y2 = static_cast<int>(size * (std::cos(pitch) * std::cos(roll) - 
+  int y2 = static_cast<int>(
-                            std::sin(pitch) * std::sin(yaw) * std::sin(roll))) + tdy;
+               size * (std::cos(pitch) * std::cos(roll) -
                       std::sin(pitch) * std::sin(yaw) * std::sin(roll))) +
           tdy;
  // Z-Axis | drawn in blue
  int x3 = static_cast<int>(size * std::sin(yaw)) + tdx;
  int y3 = static_cast<int>(-size * std::cos(yaw) * std::sin(pitch)) + tdy;
-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1), cv::Scalar(0, 0, 255), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1),
-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2), cv::Scalar(0, 255, 0), line_size);
+           cv::Scalar(0, 0, 255), line_size);
-  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3), cv::Scalar(255, 0, 0), line_size);
+  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2),
           cv::Scalar(0, 255, 0), line_size);
  cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3),
           cv::Scalar(255, 0, 0), line_size);
  return vis_im;
 }
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/keypoint.cc
+++ b/fastdeploy/vision/visualize/keypoint.cc
@@ -11,9 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 namespace fastdeploy {
@@ -29,7 +26,7 @@ cv::Mat VisKeypointDetection(const cv::Mat& im,
  auto colormap = GenerateColorMap();
  cv::Mat vis_img = im.clone();
  int detection_nums = results.keypoints.size() / 17;
-  for (int i = 0; i < detection_nums; i++){
+  for (int i = 0; i < detection_nums; i++) {
    int index = i * 17;
    bool is_over_threshold = true;
    for (int j = 0; j < results.num_joints; j++) {
@@ -43,20 +40,18 @@ cv::Mat VisKeypointDetection(const cv::Mat& im,
        int x_coord = int(results.keypoints[index + k][0]);
        int y_coord = int(results.keypoints[index + k][1]);
        cv::circle(vis_img, cv::Point2d(x_coord, y_coord), 1,
-                  cv::Scalar(0, 0, 255), 2);
+                   cv::Scalar(0, 0, 255), 2);
        int x_start = int(results.keypoints[index + edge[k][0]][0]);
        int y_start = int(results.keypoints[index + edge[k][0]][1]);
        int x_end = int(results.keypoints[index + edge[k][1]][0]);
        int y_end = int(results.keypoints[index + edge[k][1]][1]);
-        cv::line(vis_img, cv::Point2d(x_start, y_start), cv::Point2d(x_end, y_end),
+        cv::line(vis_img, cv::Point2d(x_start, y_start),
-                colormap[k], 1);
+                 cv::Point2d(x_end, y_end), colormap[k], 1);
      }
    }
  }
  return vis_img;
 }
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/matting.cc
+++ b/fastdeploy/vision/visualize/matting.cc
@@ -11,9 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -120,4 +117,3 @@ cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/mot.cc
+++ b/fastdeploy/vision/visualize/mot.cc
@@ -12,21 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include <iomanip>
 #include "fastdeploy/vision/visualize/visualize.h"
 namespace fastdeploy {
 namespace vision {
 cv::Scalar GetMOTBoxColor(int idx) {
  idx = idx * 3;
-  cv::Scalar color = cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
+  cv::Scalar color =
      cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
  return color;
 }
 cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
-               float score_threshold, tracking::TrailRecorder* recorder) {
+               float score_threshold, tracking::TrailRecorder *recorder) {
  cv::Mat vis_img = img.clone();
  int im_h = img.rows;
  int im_w = img.cols;
@@ -35,56 +36,44 @@ cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results,
  float line_thickness = std::max(1, static_cast<int>(im_w / 500.));
  for (int i = 0; i < results.boxes.size(); ++i) {
    if (results.scores[i] < score_threshold) {
-        continue;
+      continue;
    }
    const int obj_id = results.ids[i];
    const float score = results.scores[i];
    cv::Scalar color = GetMOTBoxColor(obj_id);
-    if (recorder != nullptr){
+    if (recorder != nullptr) {
      int id = results.ids[i];
      auto iter = recorder->records.find(id);
      if (iter != recorder->records.end()) {
        for (int j = 0; j < iter->second.size(); j++) {
-            cv::Point center(iter->second[j][0], iter->second[j][1]);
+          cv::Point center(iter->second[j][0], iter->second[j][1]);
-            cv::circle(vis_img, center, text_thickness, color);
+          cv::circle(vis_img, center, text_thickness, color);
        }
      }
    }
    cv::Point pt1 = cv::Point(results.boxes[i][0], results.boxes[i][1]);
    cv::Point pt2 = cv::Point(results.boxes[i][2], results.boxes[i][3]);
-    cv::Point id_pt =
+    cv::Point id_pt = cv::Point(results.boxes[i][0], results.boxes[i][1] + 10);
            cv::Point(results.boxes[i][0], results.boxes[i][1] + 10);
    cv::Point score_pt =
-            cv::Point(results.boxes[i][0], results.boxes[i][1] - 10);
+        cv::Point(results.boxes[i][0], results.boxes[i][1] - 10);
    cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
    std::ostringstream idoss;
    idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
    idoss << obj_id;
    std::string id_text = idoss.str();
-    cv::putText(vis_img,
+    cv::putText(vis_img, id_text, id_pt, cv::FONT_HERSHEY_PLAIN, text_scale,
-                id_text,
+                color, text_thickness);
                id_pt,
                cv::FONT_HERSHEY_PLAIN,
                text_scale,
                color,
                text_thickness);
    std::ostringstream soss;
    soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
    soss << score;
    std::string score_text = soss.str();
-    cv::putText(vis_img,
+    cv::putText(vis_img, score_text, score_pt, cv::FONT_HERSHEY_PLAIN,
-                score_text,
+                text_scale, color, text_thickness);
                score_pt,
                cv::FONT_HERSHEY_PLAIN,
                text_scale,
                color,
                text_thickness);
  }
  return vis_img;
 }
-}// namespace vision
+}  // namespace vision
-} //namespace fastdepoly
+}  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/ocr.cc
+++ b/fastdeploy/vision/visualize/ocr.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 namespace fastdeploy {
@@ -63,4 +61,3 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/remove_small_connnected_area.cc
+++ b/fastdeploy/vision/visualize/remove_small_connnected_area.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
@@ -110,4 +108,3 @@ cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/segmentation.cc
+++ b/fastdeploy/vision/visualize/segmentation.cc
@@ -12,19 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy/vision/visualize/segmentation_arm.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 namespace fastdeploy {
 namespace vision {
-static cv::Mat VisSegmentationCommonCpu(
+static cv::Mat VisSegmentationCommonCpu(const cv::Mat& im,
-  const cv::Mat& im, const SegmentationResult& result,
+                                        const SegmentationResult& result,
-  float weight) {
+                                        float weight) {
  // Use the native c++ version without any optimization.
  auto color_map = GenerateColorMap(1000);
  int64_t height = result.shape[0];
@@ -52,12 +50,12 @@ static cv::Mat VisSegmentationCommonCpu(
 cv::Mat VisSegmentation(const cv::Mat& im, const SegmentationResult& result,
                        float weight) {
-  // TODO: Support SSE/AVX on x86_64 platforms                        
+  // TODO: Support SSE/AVX on x86_64 platforms
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
  return VisSegmentationNEON(im, result, weight, true);
-#else  
+#else
  return VisSegmentationCommonCpu(im, result, weight);
-#endif  
+#endif
 }
 cv::Mat Visualize::VisSegmentation(const cv::Mat& im,
@@ -65,14 +63,13 @@ cv::Mat Visualize::VisSegmentation(const cv::Mat& im,
  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisSegmentation is "
               "deprecated, please use fastdeploy::vision:VisSegmentation "
               "function instead."
-            << std::endl;     
+            << std::endl;
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
  return VisSegmentationNEON(im, result, 0.5f, true);
-#else  
+#else
  return VisSegmentationCommonCpu(im, result, 0.5f);
-#endif  
+#endif
 }
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/segmentation_arm.cc
+++ b/fastdeploy/vision/visualize/segmentation_arm.cc
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/segmentation_arm.h"
 #ifdef __ARM_NEON
 #include <arm_neon.h>
@@ -24,8 +22,9 @@ namespace vision {
 static constexpr int _OMP_THREADS = 2;
-static inline void QuantizeBlendingWeight8(
+static inline void QuantizeBlendingWeight8(float weight,
-  float weight, uint8_t* old_multi_factor, uint8_t* new_multi_factor) {
+                                           uint8_t* old_multi_factor,
                                           uint8_t* new_multi_factor) {
  // Quantize the weight to boost blending performance.
  // if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7
  // if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6
@@ -39,34 +38,34 @@ static inline void QuantizeBlendingWeight8(
  *old_multi_factor = (8 - weight_quantize);
 }
-cv::Mat VisSegmentationNEON(
+cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result,
-  const cv::Mat& im, const SegmentationResult& result,
+                            float weight, bool quantize_weight) {
-  float weight, bool quantize_weight) {
+#ifndef __ARM_NEON
-#ifndef __ARM_NEON  
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
 #else
  int64_t height = result.shape[0];
  int64_t width = result.shape[1];
  auto vis_img = cv::Mat(height, width, CV_8UC3);
-  
+
  int32_t size = static_cast<int32_t>(height * width);
-  uint8_t *vis_ptr = static_cast<uint8_t*>(vis_img.data);
+  uint8_t* vis_ptr = static_cast<uint8_t*>(vis_img.data);
-  const uint8_t *label_ptr = static_cast<const uint8_t*>(result.label_map.data());
+  const uint8_t* label_ptr =
-  const uint8_t *im_ptr = static_cast<const uint8_t*>(im.data);
+      static_cast<const uint8_t*>(result.label_map.data());
  const uint8_t* im_ptr = static_cast<const uint8_t*>(im.data);
  if (!quantize_weight) {
    uint8x16_t zerox16 = vdupq_n_u8(0);
-    #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
    for (int i = 0; i < size - 15; i += 16) {
      uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3);  // 48 bytes
-      uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i);     // 16 bytes
      uint8x16_t ibx16 = bgrx16x3.val[0];
      uint8x16_t igx16 = bgrx16x3.val[1];
      uint8x16_t irx16 = bgrx16x3.val[2];
      // e.g 0b00000001 << 7 -> 0b10000000 128;
-      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); 
+      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
-      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); 
+      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
-      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); 
+      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
      uint8x16x3_t vbgrx16x3;
      // Keep the pixels of input im if mask = 0
      uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16);
@@ -77,35 +76,34 @@ cv::Mat VisSegmentationNEON(
    }
    for (int i = size - 15; i < size; i++) {
      uint8_t label = label_ptr[i];
-      vis_ptr[i * 3 + 0] = (label << 7); 
+      vis_ptr[i * 3 + 0] = (label << 7);
-      vis_ptr[i * 3 + 1] = (label << 4); 
+      vis_ptr[i * 3 + 1] = (label << 4);
-      vis_ptr[i * 3 + 2] = (label << 3); 
+      vis_ptr[i * 3 + 2] = (label << 3);
    }
    // Blend the colors use OpenCV
    cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img);
    return vis_img;
  }
-  
+
  // Quantize the weight to boost blending performance.
  // After that, we can directly use shift instructions
-  // to blend the colors from input im and mask. Please 
+  // to blend the colors from input im and mask. Please
  // check QuantizeBlendingWeight8 for more details.
  uint8_t old_multi_factor, new_multi_factor;
-  QuantizeBlendingWeight8(weight, &old_multi_factor,
+  QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor);
                          &new_multi_factor);     
  if (new_multi_factor == 0) {
-    return im; // Only keep origin image.
+    return im;  // Only keep origin image.
-  }                                            
+  }
-  
+
  if (new_multi_factor == 8) {
-    // Only keep mask, no need to blending with origin image.
+// Only keep mask, no need to blending with origin image.
-    #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
    for (int i = 0; i < size - 15; i += 16) {
-      uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+      uint8x16_t labelx16 = vld1q_u8(label_ptr + i);  // 16 bytes
      // e.g 0b00000001 << 7 -> 0b10000000 128;
-      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); 
+      uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
-      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); 
+      uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
-      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); 
+      uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
      uint8x16x3_t vbgr16x3;
      vbgr16x3.val[0] = mbx16;
      vbgr16x3.val[1] = mgx16;
@@ -114,36 +112,36 @@ cv::Mat VisSegmentationNEON(
    }
    for (int i = size - 15; i < size; i++) {
      uint8_t label = label_ptr[i];
-      vis_ptr[i * 3 + 0] = (label << 7); 
+      vis_ptr[i * 3 + 0] = (label << 7);
-      vis_ptr[i * 3 + 1] = (label << 4); 
+      vis_ptr[i * 3 + 1] = (label << 4);
-      vis_ptr[i * 3 + 2] = (label << 3); 
+      vis_ptr[i * 3 + 2] = (label << 3);
-    }  
+    }
-    return vis_img;   
+    return vis_img;
  }
-  
+
  uint8x16_t zerox16 = vdupq_n_u8(0);
  uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor);
  uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor);
-  // Blend the two colors together with quantize 'weight'.
+// Blend the two colors together with quantize 'weight'.
-  #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
  for (int i = 0; i < size - 15; i += 16) {
    uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3);  // 48 bytes
-    uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
+    uint8x16_t labelx16 = vld1q_u8(label_ptr + i);     // 16 bytes
    uint8x16_t ibx16 = bgrx16x3.val[0];
    uint8x16_t igx16 = bgrx16x3.val[1];
    uint8x16_t irx16 = bgrx16x3.val[2];
    // e.g 0b00000001 << 7 -> 0b10000000 128;
-    uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); 
+    uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
-    uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); 
+    uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
-    uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); 
+    uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
    // Moving 7 bits to the right tends to result in zero,
-    // So, We choose to shift 3 bits to get an approximation 
+    // So, We choose to shift 3 bits to get an approximation
    uint8x16_t ibx16_mshr = vmulq_u8(vshrq_n_u8(ibx16, 3), old_fx16);
-    uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16);   
+    uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16);
    uint8x16_t irx16_mshr = vmulq_u8(vshrq_n_u8(irx16, 3), old_fx16);
    uint8x16_t mbx16_mshr = vmulq_u8(vshrq_n_u8(mbx16, 3), new_fx16);
    uint8x16_t mgx16_mshr = vmulq_u8(vshrq_n_u8(mgx16, 3), new_fx16);
-    uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16);  
+    uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16);
    uint8x16_t qbx16 = vqaddq_u8(ibx16_mshr, mbx16_mshr);
    uint8x16_t qgx16 = vqaddq_u8(igx16_mshr, mgx16_mshr);
    uint8x16_t qrx16 = vqaddq_u8(irx16_mshr, mrx16_mshr);
@@ -152,10 +150,10 @@ cv::Mat VisSegmentationNEON(
    uint8x16_t abx16 = vandq_u8(cezx16, ibx16);
    uint8x16_t agx16 = vandq_u8(cezx16, igx16);
    uint8x16_t arx16 = vandq_u8(cezx16, irx16);
-    uint8x16x3_t vbgr16x3;  
+    uint8x16x3_t vbgr16x3;
-    // Reset qx values to 0 if label is 0, then, keep mask values 
+    // Reset qx values to 0 if label is 0, then, keep mask values
-    // if label is not 0  
+    // if label is not 0
-    uint8x16_t ncezx16 = vmvnq_u8(cezx16); 
+    uint8x16_t ncezx16 = vmvnq_u8(cezx16);
    vbgr16x3.val[0] = vorrq_u8(abx16, vandq_u8(ncezx16, qbx16));
    vbgr16x3.val[1] = vorrq_u8(agx16, vandq_u8(ncezx16, qgx16));
    vbgr16x3.val[2] = vorrq_u8(arx16, vandq_u8(ncezx16, qrx16));
@@ -164,18 +162,16 @@ cv::Mat VisSegmentationNEON(
  }
  for (int i = size - 15; i < size; i++) {
    uint8_t label = label_ptr[i];
-    vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor 
+    vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor +
-      + ((label << 7) >> 3) * new_multi_factor; 
+                         ((label << 7) >> 3) * new_multi_factor;
-    vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor 
+    vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor +
-      + ((label << 4) >> 3) * new_multi_factor; 
+                         ((label << 4) >> 3) * new_multi_factor;
-    vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor 
+    vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor +
-      + ((label << 3) >> 3) * new_multi_factor;   
+                         ((label << 3) >> 3) * new_multi_factor;
-  }  
+  }
  return vis_img;
-#endif  
+#endif
 }
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/segmentation_arm.h
+++ b/fastdeploy/vision/visualize/segmentation_arm.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #pragma once
 #include "fastdeploy/vision/common/result.h"
@@ -26,6 +25,3 @@ cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/swap_background.cc
+++ b/fastdeploy/vision/visualize/swap_background.cc
@@ -12,20 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifdef ENABLE_VISION_VISUALIZE
+#include "fastdeploy/utils/utils.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy/vision/visualize/swap_background_arm.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
 namespace vision {
-static cv::Mat SwapBackgroundCommonCpu(
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im,
-  const cv::Mat& im, const cv::Mat& background,
+                                       const cv::Mat& background,
-  const MattingResult& result, bool remove_small_connected_area) {
+                                       const MattingResult& result,
                                       bool remove_small_connected_area) {
  FDASSERT((!im.empty()), "Image can't be empty!");
  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
  FDASSERT((!background.empty()), "Background image can't be empty!");
@@ -77,9 +76,10 @@ static cv::Mat SwapBackgroundCommonCpu(
  return vis_img;
 }
-static cv::Mat SwapBackgroundCommonCpu(
+static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im,
-  const cv::Mat& im, const cv::Mat& background,
+                                       const cv::Mat& background,
-  const SegmentationResult& result, int background_label) {
+                                       const SegmentationResult& result,
                                       int background_label) {
  FDASSERT((!im.empty()), "Image can't be empty!");
  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
  FDASSERT((!background.empty()), "Background image can't be empty!");
@@ -129,25 +129,25 @@ static cv::Mat SwapBackgroundCommonCpu(
 cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background,
                       const MattingResult& result,
                       bool remove_small_connected_area) {
-  // TODO: Support SSE/AVX on x86_64 platforms                        
+  // TODO: Support SSE/AVX on x86_64 platforms
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
-  return SwapBackgroundNEON(im, background, result, 
+  return SwapBackgroundNEON(im, background, result,
-                            remove_small_connected_area);                       
+                            remove_small_connected_area);
-#else  
+#else
-  return SwapBackgroundCommonCpu(im, background, result, 
+  return SwapBackgroundCommonCpu(im, background, result,
-                                 remove_small_connected_area);                          
+                                 remove_small_connected_area);
-#endif    
+#endif
 }
 cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background,
                       const SegmentationResult& result, int background_label) {
-  // TODO: Support SSE/AVX on x86_64 platforms                        
+  // TODO: Support SSE/AVX on x86_64 platforms
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
  // return SwapBackgroundNEON(im, background, result, background_label);
  return SwapBackgroundNEON(im, background, result, background_label);
-#else  
+#else
  return SwapBackgroundCommonCpu(im, background, result, background_label);
-#endif    
+#endif
 }
 // DEPRECATED
@@ -155,27 +155,26 @@ cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im,
                                         const cv::Mat& background,
                                         const MattingResult& result,
                                         bool remove_small_connected_area) {
-// TODO: Support SSE/AVX on x86_64 platforms                        
+// TODO: Support SSE/AVX on x86_64 platforms
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
-  return SwapBackgroundNEON(im, background, result, 
+  return SwapBackgroundNEON(im, background, result,
                            remove_small_connected_area);
-#else  
+#else
-  return SwapBackgroundCommonCpu(im, background, result, 
+  return SwapBackgroundCommonCpu(im, background, result,
                                 remove_small_connected_area);
-#endif                                              
+#endif
 }
 cv::Mat Visualize::SwapBackgroundSegmentation(
    const cv::Mat& im, const cv::Mat& background, int background_label,
    const SegmentationResult& result) {
-  // TODO: Support SSE/AVX on x86_64 platforms                        
+  // TODO: Support SSE/AVX on x86_64 platforms
-#ifdef __ARM_NEON 
+#ifdef __ARM_NEON
  return SwapBackgroundNEON(im, background, result, background_label);
-#else  
+#else
  return SwapBackgroundCommonCpu(im, background, result, background_label);
-#endif    
+#endif
 }
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/swap_background_arm.cc
+++ b/fastdeploy/vision/visualize/swap_background_arm.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy/vision/visualize/swap_background_arm.h"
 #include "fastdeploy/vision/visualize/visualize.h"
 #ifdef __ARM_NEON
 #include <arm_neon.h>
 #endif
@@ -25,209 +25,214 @@ namespace vision {
 static constexpr int _OMP_THREADS = 2;
-cv::Mat SwapBackgroundNEON(const cv::Mat& im, 
+cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background,
-                           const cv::Mat& background, 
+                           const MattingResult& result,
                           const MattingResult& result, 
                           bool remove_small_connected_area) {
-#ifndef __ARM_NEON  
+#ifndef __ARM_NEON
-   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!");
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!");
 #else
-   FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((!im.empty()), "Image can't be empty!");
-   FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
-   FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
-   FDASSERT((background.channels() == 3),
+  FDASSERT((background.channels() == 3),
-            "Only support 3 channels background image mat!");
+           "Only support 3 channels background image mat!");
-   int out_h = static_cast<int>(result.shape[0]);
+  int out_h = static_cast<int>(result.shape[0]);
-   int out_w = static_cast<int>(result.shape[1]);
+  int out_w = static_cast<int>(result.shape[1]);
-   int height = im.rows;
+  int height = im.rows;
-   int width = im.cols;
+  int width = im.cols;
-   int bg_height = background.rows;
+  int bg_height = background.rows;
-   int bg_width = background.cols;
+  int bg_width = background.cols;
   // WARN: may change the original alpha
   float* alpha_ptr = const_cast<float*>(result.alpha.data());
-   cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
+  // WARN: may change the original alpha
-   if (remove_small_connected_area) {
+  float* alpha_ptr = const_cast<float*>(result.alpha.data());
      alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
   }
   auto vis_img = cv::Mat(height, width, CV_8UC3);  
   cv::Mat background_ref;
   if ((bg_height != height) || (bg_width != width)) {
      cv::resize(background, background_ref, cv::Size(width, height));
   } else {
      background_ref = background; // ref only
   }
   if ((background_ref).type() != CV_8UC3) {
      (background_ref).convertTo((background_ref), CV_8UC3);
   }
-   if ((out_h != height) || (out_w != width)) {
+  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
-      cv::resize(alpha, alpha, cv::Size(width, height));
+  if (remove_small_connected_area) {
-   }
+    alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f);
  }
  auto vis_img = cv::Mat(height, width, CV_8UC3);
-   uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
+  cv::Mat background_ref;
-   const uint8_t* background_data = static_cast<const uint8_t*>(background_ref.data);
+  if ((bg_height != height) || (bg_width != width)) {
-   const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
+    cv::resize(background, background_ref, cv::Size(width, height));
-   const float* alpha_data = reinterpret_cast<const float*>(alpha.data);
+  } else {
    background_ref = background;  // ref only
  }
  if ((background_ref).type() != CV_8UC3) {
    (background_ref).convertTo((background_ref), CV_8UC3);
  }
-   const int32_t size = static_cast<int32_t>(height * width);
+  if ((out_h != height) || (out_w != width)) {
-   #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+    cv::resize(alpha, alpha, cv::Size(width, height));
-   for(int i = 0; i < size - 7; i += 8) {
+  }
      uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3);  // 24 bytes
      // u8 -> u16 -> u32 -> f32
      uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]);
      uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]);
      uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]);
      uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3);  // 24 bytes
      uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]);
      uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]);
      uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]);
-      uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8));
+  uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
-      uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8));
+  const uint8_t* background_data =
-      uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8));
+      static_cast<const uint8_t*>(background_ref.data);
-      uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8));
+  const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
-      uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8));
+  const float* alpha_data = reinterpret_cast<const float*>(alpha.data);
      uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8));
-      uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8));
+  const int32_t size = static_cast<int32_t>(height * width);
-      uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8));
+#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
-      uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8));
+  for (int i = 0; i < size - 7; i += 8) {
-      uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8));
+    uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3);  // 24 bytes
-      uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8));
+    // u8 -> u16 -> u32 -> f32
-      uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8));
+    uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]);
    uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]);
    uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]);
    uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3);  // 24 bytes
    uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]);
    uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]);
    uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]);
-      float32x4_t fhibx4 = vcvtq_f32_u32(hibx4);
+    uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8));
-      float32x4_t fhigx4 = vcvtq_f32_u32(higx4);
+    uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8));
-      float32x4_t fhirx4 = vcvtq_f32_u32(hirx4);
+    uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8));
-      float32x4_t flibx4 = vcvtq_f32_u32(libx4);
+    uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8));
-      float32x4_t fligx4 = vcvtq_f32_u32(ligx4);
+    uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8));
-      float32x4_t flirx4 = vcvtq_f32_u32(lirx4);
+    uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8));
-      float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4);
+    uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8));
-      float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4);
+    uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8));
-      float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4);
+    uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8));
-      float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4);
+    uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8));
-      float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4);
+    uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8));
-      float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4);
+    uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8));
      // alpha load from little end
      float32x4_t lalpx4 = vld1q_f32(alpha_data + i); // low bits
      float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits
      float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4);
      float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4);
-      // blending 
+    float32x4_t fhibx4 = vcvtq_f32_u32(hibx4);
-      float32x4_t fhvbx4 = vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4));
+    float32x4_t fhigx4 = vcvtq_f32_u32(higx4);
-      float32x4_t fhvgx4 = vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4));
+    float32x4_t fhirx4 = vcvtq_f32_u32(hirx4);
-      float32x4_t fhvrx4 = vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4));
+    float32x4_t flibx4 = vcvtq_f32_u32(libx4);
-      float32x4_t flvbx4 = vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4));
+    float32x4_t fligx4 = vcvtq_f32_u32(ligx4);
-      float32x4_t flvgx4 = vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4));
+    float32x4_t flirx4 = vcvtq_f32_u32(lirx4);
      float32x4_t flvrx4 = vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4));
-      // f32 -> u32 -> u16 -> u8
+    float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4);
-      uint8x8x3_t vbgrx8x3;
+    float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4);
-      // combine low 64 bits and high 64 bits into one 128 neon register
+    float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4);
-      vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)), 
+    float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4);
-                                               vmovn_u32(vcvtq_u32_f32(fhvbx4))));
+    float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4);
-      vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)), 
+    float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4);
                                               vmovn_u32(vcvtq_u32_f32(fhvgx4))));
      vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)), 
                                               vmovn_u32(vcvtq_u32_f32(fhvrx4))));                                         
      vst3_u8(vis_data + i * 3, vbgrx8x3);
   }
-   for (int i = size - 7; i < size; i++) {
+    // alpha load from little end
-      float alp = alpha_data[i];
+    float32x4_t lalpx4 = vld1q_f32(alpha_data + i);      // low bits
-      for (int c = 0; c < 3; ++c) {
+    float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4);  // high bits
-         vis_data[i * 3 + 0] = cv::saturate_cast<uchar>(
+    float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4);
-            static_cast<float>(im_data[i * 3 + c]) * alp  + (1.0f - alp) 
+    float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4);
            * static_cast<float>(background_data[i * 3 + c]));
      }
   }
-   return vis_img;
+    // blending
    float32x4_t fhvbx4 =
        vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4));
    float32x4_t fhvgx4 =
        vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4));
    float32x4_t fhvrx4 =
        vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4));
    float32x4_t flvbx4 =
        vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4));
    float32x4_t flvgx4 =
        vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4));
    float32x4_t flvrx4 =
        vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4));
    // f32 -> u32 -> u16 -> u8
    uint8x8x3_t vbgrx8x3;
    // combine low 64 bits and high 64 bits into one 128 neon register
    vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)),
                                             vmovn_u32(vcvtq_u32_f32(fhvbx4))));
    vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)),
                                             vmovn_u32(vcvtq_u32_f32(fhvgx4))));
    vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)),
                                             vmovn_u32(vcvtq_u32_f32(fhvrx4))));
    vst3_u8(vis_data + i * 3, vbgrx8x3);
  }
  for (int i = size - 7; i < size; i++) {
    float alp = alpha_data[i];
    for (int c = 0; c < 3; ++c) {
      vis_data[i * 3 + 0] = cv::saturate_cast<uchar>(
          static_cast<float>(im_data[i * 3 + c]) * alp +
          (1.0f - alp) * static_cast<float>(background_data[i * 3 + c]));
    }
  }
  return vis_img;
 #endif
 }
-cv::Mat SwapBackgroundNEON(const cv::Mat& im,
+cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background,
                           const cv::Mat& background,
                           const SegmentationResult& result,
                           int background_label) {
-#ifndef __ARM_NEON  
+#ifndef __ARM_NEON
-   FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
+  FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
 #else
-   FDASSERT((!im.empty()), "Image can't be empty!");
+  FDASSERT((!im.empty()), "Image can't be empty!");
-   FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
+  FDASSERT((im.channels() == 3), "Only support 3 channels image mat!");
-   FDASSERT((!background.empty()), "Background image can't be empty!");
+  FDASSERT((!background.empty()), "Background image can't be empty!");
-   FDASSERT((background.channels() == 3),
+  FDASSERT((background.channels() == 3),
-            "Only support 3 channels background image mat!");
+           "Only support 3 channels background image mat!");
-   int out_h = static_cast<int>(result.shape[0]);
+  int out_h = static_cast<int>(result.shape[0]);
-   int out_w = static_cast<int>(result.shape[1]);
+  int out_w = static_cast<int>(result.shape[1]);
-   int height = im.rows;
+  int height = im.rows;
-   int width = im.cols;
+  int width = im.cols;
-   int bg_height = background.rows;
+  int bg_height = background.rows;
-   int bg_width = background.cols;
+  int bg_width = background.cols;
-   auto vis_img = cv::Mat(height, width, CV_8UC3);  
+  auto vis_img = cv::Mat(height, width, CV_8UC3);
   cv::Mat background_ref;
   if ((bg_height != height) || (bg_width != width)) {
      cv::resize(background, background_ref, cv::Size(width, height));
   } else {
      background_ref = background; // ref only
   }
   if ((background_ref).type() != CV_8UC3) {
      (background_ref).convertTo((background_ref), CV_8UC3);
   }
   uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
   const uint8_t* background_data = static_cast<const uint8_t*>(background_ref.data);
   const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
   const uint8_t *label_data = static_cast<const uint8_t*>(result.label_map.data());
-   const uint8_t background_label_ = static_cast<uint8_t>(background_label);
+  cv::Mat background_ref;
-   const int32_t size = static_cast<int32_t>(height * width);
+  if ((bg_height != height) || (bg_width != width)) {
    cv::resize(background, background_ref, cv::Size(width, height));
  } else {
    background_ref = background;  // ref only
  }
  if ((background_ref).type() != CV_8UC3) {
    (background_ref).convertTo((background_ref), CV_8UC3);
  }
-   uint8x16_t backgroundx16 = vdupq_n_u8(background_label_);
+  uint8_t* vis_data = static_cast<uint8_t*>(vis_img.data);
-   #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
+  const uint8_t* background_data =
-   for (int i = 0; i < size - 15; i += 16) {
+      static_cast<const uint8_t*>(background_ref.data);
-      uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes
+  const uint8_t* im_data = static_cast<const uint8_t*>(im.data);
-      uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3);
+  const uint8_t* label_data =
-      uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes
+      static_cast<const uint8_t*>(result.label_map.data());
      // Set mask bit = 1 if label != background_label
      uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16);
      uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1
      uint8x16x3_t vbgr16x3;
      vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16), 
                                 vandq_u8(bbgr16x3.val[0], nkeepx16));
      vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16), 
                                 vandq_u8(bbgr16x3.val[1], nkeepx16));
      vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16), 
                                 vandq_u8(bbgr16x3.val[2], nkeepx16));
      // Store the blended pixels to vis img
      vst3q_u8(vis_data + i * 3, vbgr16x3);
   }
-   for (int i = size - 15; i < size; i++) {
+  const uint8_t background_label_ = static_cast<uint8_t>(background_label);
-      uint8_t label = label_data[i];
+  const int32_t size = static_cast<int32_t>(height * width);
      if (label != background_label_) {
         vis_data[i * 3 + 0] = im_data[i * 3 + 0];
         vis_data[i * 3 + 1] = im_data[i * 3 + 1];
         vis_data[i * 3 + 2] = im_data[i * 3 + 2];
      } else {
         vis_data[i * 3 + 0] = background_data[i * 3 + 0];
         vis_data[i * 3 + 1] = background_data[i * 3 + 1];
         vis_data[i * 3 + 2] = background_data[i * 3 + 2];
      }
   }
-   return vis_img;
+  uint8x16_t backgroundx16 = vdupq_n_u8(background_label_);
 #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
  for (int i = 0; i < size - 15; i += 16) {
    uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3);  // 48 bytes
    uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3);
    uint8x16_t labelx16 = vld1q_u8(label_data + i);  // 16 bytes
    // Set mask bit = 1 if label != background_label
    uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16);
    uint8x16_t keepx16 = vmvnq_u8(nkeepx16);  // keep_value = 1
    uint8x16x3_t vbgr16x3;
    vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16),
                               vandq_u8(bbgr16x3.val[0], nkeepx16));
    vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16),
                               vandq_u8(bbgr16x3.val[1], nkeepx16));
    vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16),
                               vandq_u8(bbgr16x3.val[2], nkeepx16));
    // Store the blended pixels to vis img
    vst3q_u8(vis_data + i * 3, vbgr16x3);
  }
  for (int i = size - 15; i < size; i++) {
    uint8_t label = label_data[i];
    if (label != background_label_) {
      vis_data[i * 3 + 0] = im_data[i * 3 + 0];
      vis_data[i * 3 + 1] = im_data[i * 3 + 1];
      vis_data[i * 3 + 2] = im_data[i * 3 + 2];
    } else {
      vis_data[i * 3 + 0] = background_data[i * 3 + 0];
      vis_data[i * 3 + 1] = background_data[i * 3 + 1];
      vis_data[i * 3 + 2] = background_data[i * 3 + 2];
    }
  }
  return vis_img;
 #endif
 }
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/swap_background_arm.h
+++ b/fastdeploy/vision/visualize/swap_background_arm.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #pragma once
 #include "fastdeploy/vision/common/result.h"
@@ -21,18 +20,15 @@
 namespace fastdeploy {
 namespace vision {
-cv::Mat SwapBackgroundNEON(const cv::Mat& im, 
+cv::Mat SwapBackgroundNEON(const cv::Mat& im,
-                           const cv::Mat& background, 
+                           const cv::Mat& background,
-                           const MattingResult& result, 
+                           const MattingResult& result,
                           bool remove_small_connected_area = false);
 cv::Mat SwapBackgroundNEON(const cv::Mat& im,
                           const cv::Mat& background,
                           const SegmentationResult& result,
-                           int background_label);    
+                           int background_label);
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/visualize.cc
+++ b/fastdeploy/vision/visualize/visualize.cc
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 namespace fastdeploy {
@@ -66,4 +65,3 @@ const std::vector<int>& Visualize::GetColorMap(int num_classes) {
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #pragma once
 #include "fastdeploy/vision/common/result.h"
@@ -202,4 +201,3 @@ FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/scripts/android/build_android_cpp.sh
+++ b/scripts/android/build_android_cpp.sh
@@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() {
        -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
        -DENABLE_ORT_BACKEND=OFF \
        -DENABLE_LITE_BACKEND=ON \
-        -DENABLE_PADDLE_FRONTEND=OFF \
+        -DENABLE_PADDLE2ONNX=OFF \
        -DENABLE_FLYCV=ON \
        -DENABLE_TEXT=OFF \
        -DENABLE_VISION=ON \
        -DENABLE_VISION_VISUALIZE=ON \
        -DBUILD_EXAMPLES=ON \
        -DWITH_OPENCV_STATIC=OFF \
        -DWITH_LITE_STATIC=OFF \
--- a/scripts/android/build_android_cpp_with_text_api.sh
+++ b/scripts/android/build_android_cpp_with_text_api.sh
@@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() {
        -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
        -DENABLE_ORT_BACKEND=OFF \
        -DENABLE_LITE_BACKEND=ON \
-        -DENABLE_PADDLE_FRONTEND=OFF \
+        -DENABLE_PADDLE2ONNX=OFF \
        -DENABLE_FLYCV=ON \
        -DENABLE_TEXT=ON \
        -DENABLE_VISION=ON \
        -DENABLE_VISION_VISUALIZE=ON \
        -DBUILD_EXAMPLES=ON \
        -DWITH_OPENCV_STATIC=OFF \
        -DWITH_LITE_STATIC=OFF \
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -13,32 +13,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-ARGS=`getopt -a -o w:n:h:hs -l WITH_GPU:,docker_name:,http_proxy:,https_proxy: -- "$@"` 
+ARGS=`getopt -a -o w:n:h:hs -l WITH_GPU:,docker_name:,http_proxy:,https_proxy: -- "$@"`
-eval set -- "${ARGS}" 
+eval set -- "${ARGS}"
 echo "parse start"
-while true  
+while true
-do  
+do
-        case "$1" in 
+        case "$1" in
-        -w|--WITH_GPU)  
+        -w|--WITH_GPU)
-                WITH_GPU="$2" 
+                WITH_GPU="$2"
                shift;;
-        -n|--docker_name)  
+        -n|--docker_name)
-                docker_name="$2" 
+                docker_name="$2"
                shift;;
-        -h|--http_proxy)  
+        -h|--http_proxy)
-                http_proxy="$2" 
+                http_proxy="$2"
                shift;;
-        -hs|--https_proxy)  
+        -hs|--https_proxy)
-                https_proxy="$2" 
+                https_proxy="$2"
                shift;;
-        --)  
+        --)
                shift
-                break;;  
+                break;;
        esac
 shift
-done 
+done
 if [ -z $WITH_GPU ];then
    WITH_GPU="ON"
@@ -88,7 +88,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
            python setup.py bdist_wheel;
            cd /workspace/fastdeploy;
            rm -rf build; mkdir -p build;cd build;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install;
            cd /workspace/fastdeploy/serving;
@@ -121,7 +121,7 @@ docker run -i --rm --name ${docker_name} \
            python setup.py bdist_wheel;
            cd /workspace/fastdeploy;
            rm -rf build; mkdir build; cd build;
-            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install;
            cd /workspace/fastdeploy/serving;