Merge branch 'PaddlePaddle:develop' into develop

2025-10-19 06:54:41 +08:00 · 2023-02-09 16:42:36 +08:00
parent a172a9e293 ab5377b5fa
commit 05be955187
232 changed files with 10613 additions and 2396 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,7 +37,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake)
 # Set C++11 as standard for the whole project
 if(NOT MSVC)
  set(CMAKE_CXX_STANDARD 11)
-  set(CMAKE_CXX_FLAGS "-Wno-format")
+  set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
 endif(NOT MSVC)

@@ -68,10 +68,12 @@ option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
 option(ENABLE_CVCUDA "Whether to enable NVIDIA CV-CUDA to boost image preprocess." OFF)
 option(ENABLE_ENCRYPTION "Whether to enable ENCRYPTION." OFF)
+option(ENABLE_BENCHMARK "Whether to enable Benchmark mode." OFF)
 option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
+option(WITH_CAPI "Whether to compile with c api." OFF)

 ############################# Options for Android cross compiling #########################
 if(ANDROID)
@@ -153,6 +155,8 @@ get_osx_architecture()

 ##################################### Building: FastDeploy C++ SDK #######################################
 add_definitions(-DFASTDEPLOY_LIB)
+# set CMAKE_BUILD_TYPE to Release
+add_definitions(-DCMAKE_BUILD_TYPE=Release)
 # configure files before glob sources.
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
@@ -413,6 +417,14 @@ if(ENABLE_PADDLE2ONNX)
  list(APPEND DEPEND_LIBS external_paddle2onnx)
 endif(ENABLE_PADDLE2ONNX)

+if(WITH_CAPI)
+  include(${PROJECT_SOURCE_DIR}/c_api/CMakeLists.txt)
+  if(MSVC)
+  add_definitions(-DFD_CAPI)
+  endif()
+endif()
+
+
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
 configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py)
 configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)
@@ -466,7 +478,7 @@ if(ANDROID)
  list(APPEND DEPEND_LIBS ${log-lib})
  if(WITH_LITE_STATIC)
    # need omp for static Paddle Lite lib
-    set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE) 
+    set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE)
    message(STATUS "Force WITH_OPENMP=${WITH_OPENMP} while WITH_LITE_STATIC=ON")
  endif()
  if(WITH_OPENMP)
@@ -482,13 +494,13 @@ if(ANDROID AND WITH_JAVA)
 endif()

 if(ANDROID AND WITH_STATIC_LIB)
-  # Here, we use a dummy target (fastdelpoy_dummy) 
+  # Here, we use a dummy target (fastdelpoy_dummy)
  # to form a build dependency tree for fastdeploy_static lib.
  add_library(fastdelpoy_dummy STATIC ${ALL_DEPLOY_SRCS})
-  # Still add ${DEPEND_LIBS} for cmake to form link_libraries 
-  # property tree for a static library. 
+  # Still add ${DEPEND_LIBS} for cmake to form link_libraries
+  # property tree for a static library.
  target_link_libraries(fastdelpoy_dummy ${DEPEND_LIBS})
-  # Build fastdelpoy_dummy when the third-party 
+  # Build fastdelpoy_dummy when the third-party
  # libraries (opencv, paddle lite, flycv) are ready.
  add_dependencies(fastdelpoy_dummy ${LIBRARY_NAME})
  # Add WITH_STATIC_LIB compile definitions, see lite_backend.cc.
@@ -541,9 +553,9 @@ if(WIN32)
    RUNTIME DESTINATION lib
  )
 elseif(ANDROID)
-  if(WITH_STATIC_LIB) 
+  if(WITH_STATIC_LIB)
    install(
-      FILES 
+      FILES
      ${CMAKE_CURRENT_BINARY_DIR}/libfastdeploy_static.a
      DESTINATION lib/${ANDROID_ABI}
    )
@@ -553,11 +565,11 @@ elseif(ANDROID)
      LIBRARY DESTINATION lib/${ANDROID_ABI}
    )
  endif()
-  # Install omp into fastdeploy lib dir if WITH_OPENMP=ON 
+  # Install omp into fastdeploy lib dir if WITH_OPENMP=ON
  # and WITH_LITE_STATIC=OFF.
  if(WITH_OPENMP AND (NOT WITH_LITE_STATIC) AND OpenMP_CXX_FOUND AND ENABLE_OPENMP_SHARED)
    install(
-      FILES 
+      FILES
      ${OpenMP_CXX_LIBRARIES}
      DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/${ANDROID_ABI}
    )
@@ -594,7 +606,7 @@ else()
  # The headers and libs of opencv must be install.
  if(ENABLE_VISION)
    if(WITH_OPENCV_STATIC AND WITH_STATIC_LIB)
-      # Only need to install headers while building 
+      # Only need to install headers while building
      # FastDeploy static lib. (TODO:qiuyanjun)
      install(
        DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/opencv/sdk/native/jni/include
@@ -606,29 +618,29 @@ else()
        DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install
      )
    endif()
-  endif()
-  # only need flycv's headers (may also install libs? TODO:qiuyanjun)
-  if(ENABLE_FLYCV)
-    if(WITH_FLYCV_STATIC)
-      install(
-        DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/flycv/include
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install/flycv
-      )
-    else()
-      install(
-        DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/flycv
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install
-      )
+    # only need flycv's headers (may also install libs? TODO:qiuyanjun)
+    if(ENABLE_FLYCV)
+      if(WITH_FLYCV_STATIC)
+        install(
+          DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/flycv/include
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install/flycv
+        )
+      else()
+        install(
+          DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/flycv
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install
+        )
+      endif()
    endif()
-  endif()
-  # fast_tokenizer's static lib is not avaliable now! 
+  endif(ENABLE_VISION)
+  # fast_tokenizer's static lib is not avaliable now!
  # may support some days later(TODO:qiuyanjun)
  if(ENABLE_TEXT)
    install(
      DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/fast_tokenizer
      DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install
    )
-  endif()  
+  endif()
  # some libs may not to install while in static mode
  if(ENABLE_LITE_BACKEND)
    if(WITH_LITE_STATIC)
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -33,8 +33,8 @@ set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
 set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
-# Whether to use FastDeploy static lib. The default 
-# value for this option is determined by the SDK 
+# Whether to use FastDeploy static lib. The default
+# value for this option is determined by the SDK
 # build-time options.
 set(WITH_STATIC_LIB @WITH_STATIC_LIB@)

@@ -62,8 +62,8 @@ if(WITH_STATIC_LIB)
  # add_definitions(-DWITH_STATIC_WARNING)
 endif()

-# Still need omp while using FastDeploy static lib. 
-# This is due to the use of openmp for Paddle Lite's 
+# Still need omp while using FastDeploy static lib.
+# This is due to the use of openmp for Paddle Lite's
 # static library.
 if(ANDROID AND WITH_STATIC_LIB AND WITH_LITE_STATIC)
  include(${CMAKE_CURRENT_LIST_DIR}/openmp.cmake)
@@ -72,10 +72,10 @@ endif()
 if(ANDROID)
  add_library(fastdeploy STATIC IMPORTED GLOBAL)
  if(WITH_STATIC_LIB)
-    set_property(TARGET fastdeploy PROPERTY IMPORTED_LOCATION 
+    set_property(TARGET fastdeploy PROPERTY IMPORTED_LOCATION
                 ${CMAKE_CURRENT_LIST_DIR}/lib/${ANDROID_ABI}/lib${LIBRARY_NAME}_static.a)
  else()
-    set_property(TARGET fastdeploy PROPERTY IMPORTED_LOCATION 
+    set_property(TARGET fastdeploy PROPERTY IMPORTED_LOCATION
                 ${CMAKE_CURRENT_LIST_DIR}/lib/${ANDROID_ABI}/lib${LIBRARY_NAME}.so)
  endif()
  list(APPEND FASTDEPLOY_LIBS fastdeploy)
@@ -226,7 +226,7 @@ if(ENABLE_VISION)
        find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR})
        list(APPEND FASTDEPLOY_INCS ${OpenCV_INCLUDE_DIRS})
        # For now, we still need to link OpenCV static libs.
-        # Users may use some of opencv's apis, but they may 
+        # Users may use some of opencv's apis, but they may
        # not have been compiled into fastdeploy.
        # list(APPEND FASTDEPLOY_LIBS ${OpenCV_LIBS})
        list(APPEND FASTDEPLOY_LIBS opencv_core opencv_video opencv_highgui opencv_imgproc opencv_imgcodecs)
@@ -264,8 +264,8 @@ if(ENABLE_VISION)
        add_library(flycv_shared STATIC IMPORTED GLOBAL)
        set_property(TARGET flycv_shared PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libflycv_shared.so)
        list(APPEND FASTDEPLOY_LIBS flycv_shared)
-      else()  
-        # This code may be needed later. Therefore, I choose to 
+      else()
+        # This code may be needed later. Therefore, I choose to
        # comment it rather than delete it. (TODO:qiuyanjun)
        # add_library(flycv_static STATIC IMPORTED GLOBAL)
        # add_library(flycv_png16 STATIC IMPORTED GLOBAL)
@@ -273,25 +273,25 @@ if(ENABLE_VISION)
        # add_library(flycv_z STATIC IMPORTED GLOBAL)
        # set_property(TARGET flycv_static PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libflycv_static.a)
        # set_property(TARGET flycv_png16 PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libpng16.a)
-        # set_property(TARGET flycv_turbojpeg PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libturbojpeg.a) 
-        # set_property(TARGET flycv_z PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libz.a)  
+        # set_property(TARGET flycv_turbojpeg PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libturbojpeg.a)
+        # set_property(TARGET flycv_z PROPERTY IMPORTED_LOCATION ${FLYCV_LIB_DIR}/${ANDROID_ABI}/libz.a)
        # list(APPEND FASTDEPLOY_LIBS flycv_static)
-        # list(APPEND FASTDEPLOY_LIBS flycv_png16) 
-        # list(APPEND FASTDEPLOY_LIBS flycv_turbojpeg) 
-        # list(APPEND FASTDEPLOY_LIBS flycv_z)        
+        # list(APPEND FASTDEPLOY_LIBS flycv_png16)
+        # list(APPEND FASTDEPLOY_LIBS flycv_turbojpeg)
+        # list(APPEND FASTDEPLOY_LIBS flycv_z)
      endif()
    else()
      find_library(FLYCV_LIB flycv_shared ${FLYCV_LIB_DIR} NO_DEFAULT_PATH)
      list(APPEND FASTDEPLOY_LIBS ${FLYCV_LIB})
    endif()
  endif()
-  
+
  if(ENABLE_CVCUDA)
    find_library(CVCUDA_LIB cvcuda ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
    find_library(NVCV_TYPES_LIB nvcv_types ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH)
    list(APPEND FASTDEPLOY_LIBS ${CVCUDA_LIB} ${NVCV_TYPES_LIB})
  endif()
-  
+
 endif()

 if (ENABLE_TEXT)
@@ -404,7 +404,7 @@ if(ANDROID)
  endif()
  message(STATUS "  WITH_OPENMP:              : ${WITH_OPENMP}")
  message(STATUS "  WITH_JAVA:                : ${WITH_JAVA}")
-endif() 
+endif()
 message(STATUS "  DEPENDENCY_LIBS           : ${FASTDEPLOY_LIBS}")

 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
@@ -415,7 +415,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 endif()

 function(install_fastdeploy_libraries DESTINATION_DIR)
-  # No dynamic libs need to install while using 
+  # No dynamic libs need to install while using
  # FastDeploy static lib.
  if(WITH_STATIC_LIB)
    return()
@@ -442,9 +442,9 @@ function(install_fastdeploy_libraries DESTINATION_DIR)
      file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${DYN_LIB_SUFFIX})
    else()
      file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/${DYN_LIB_SUFFIX})
-    endif()  
+    endif()
    list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS})
-  
+
    if(NOT WITH_OPENCV_STATIC)
      if(WIN32)
        file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX})
--- a/README_CN.md
+++ b/README_CN.md
@@ -70,7 +70,7 @@
    - **微信**：扫描二维码，填写问卷加入技术社区，与社区开发者交流部署产业落地痛点问题

 <div align="center">
-    <img src="https://user-images.githubusercontent.com/54695910/200145290-d5565d18-6707-4a0b-a9af-85fd36d35d13.jpg" width = "150" height = "150" />
+    <img src="https://user-images.githubusercontent.com/54695910/216615983-bbb78319-0231-4635-86d1-f2ebf9eac85d.jpg" width = "150" height = "150" />
 </div>


--- a/README_EN.md
+++ b/README_EN.md
@@ -67,7 +67,7 @@ Including [image classification](examples/vision/classification), [object detect
  - **Wechat**：Scan the QR code below using WeChat, follow the PaddlePaddle official account and fill out the questionnaire to join the WeChat group, and share the deployment industry implementation pain points with the community developers

 <div align="center">
-<img src="https://user-images.githubusercontent.com/54695910/207262688-4225bc39-4337-4966-a5cc-26bd6557d226.jpg"  width = "150" height = "150" />
+<img src="https://user-images.githubusercontent.com/54695910/216615983-bbb78319-0231-4635-86d1-f2ebf9eac85d.jpg"  width = "150" height = "150" />
 </div>

 ## 🌌 Inference Backend and Abilities
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -0,0 +1,20 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# specify the decompress directory of FastDeploy SDK
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake)
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
+add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
+
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
+  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
+else()
+  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
+endif()
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/vision.h"
+#include "flags.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
+              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
+              std::string gpu_mem_file_name) {
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option)) {
+    PrintUsage();
+    return false;
+  }
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+
+  if (FLAGS_profile_mode == "runtime") {
+    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
+  }
+  auto model = fastdeploy::vision::detection::PaddleYOLOv8(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return false;
+  }
+  auto im = cv::imread(image_file);
+  // For Runtime
+  if (FLAGS_profile_mode == "runtime") {
+    fastdeploy::vision::DetectionResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return false;
+    }
+    double profile_time = model.GetProfileTime() * 1000;
+    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  } else {
+    // For End2End
+    // Step1: warm up for warmup times
+    std::cout << "Warmup " << warmup << " times..." << std::endl;
+    for (int i = 0; i < warmup; i++) {
+      fastdeploy::vision::DetectionResult res;
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+    }
+    std::vector<float> end2end_statis;
+    // Step2: repeat for repeats times
+    std::cout << "Counting time..." << std::endl;
+    fastdeploy::TimeCounter tc;
+    fastdeploy::vision::DetectionResult res;
+    for (int i = 0; i < repeats; i++) {
+      if (FLAGS_collect_memory_info && i % dump_period == 0) {
+        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
+#if defined(WITH_GPU)
+        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
+                                                         FLAGS_device_id);
+#endif
+      }
+      tc.Start();
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+      tc.End();
+      end2end_statis.push_back(tc.Duration() * 1000);
+    }
+    float end2end = std::accumulate(end2end_statis.end() - repeats,
+                                    end2end_statis.end(), 0.f) /
+                    repeats;
+    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  }
+
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  int repeats = FLAGS_repeat;
+  int warmup = FLAGS_warmup;
+  int dump_period = FLAGS_dump_period;
+  std::string cpu_mem_file_name = "result_cpu.txt";
+  std::string gpu_mem_file_name = "result_gpu.txt";
+  // Run model
+  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
+               cpu_mem_file_name, gpu_mem_file_name) != true) {
+    exit(1);
+  }
+  if (FLAGS_collect_memory_info) {
+    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
+    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+#if defined(WITH_GPU)
+    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
+    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+#endif
+  }
+  return 0;
+}
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -0,0 +1,114 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/vision.h"
+#include "flags.h"
+
+bool RunModel(std::string model_file, std::string image_file, size_t warmup,
+              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
+              std::string gpu_mem_file_name) {
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option)) {
+    PrintUsage();
+    return false;
+  }
+  if (FLAGS_profile_mode == "runtime") {
+    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
+  }
+  auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return false;
+  }
+  auto im = cv::imread(image_file);
+  // For Runtime
+  if (FLAGS_profile_mode == "runtime") {
+    fastdeploy::vision::DetectionResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return false;
+    }
+    double profile_time = model.GetProfileTime() * 1000;
+    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  } else {
+    // For End2End
+    // Step1: warm up for warmup times
+    std::cout << "Warmup " << warmup << " times..." << std::endl;
+    for (int i = 0; i < warmup; i++) {
+      fastdeploy::vision::DetectionResult res;
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+    }
+    std::vector<float> end2end_statis;
+    // Step2: repeat for repeats times
+    std::cout << "Counting time..." << std::endl;
+    fastdeploy::TimeCounter tc;
+    fastdeploy::vision::DetectionResult res;
+    for (int i = 0; i < repeats; i++) {
+      if (FLAGS_collect_memory_info && i % dump_period == 0) {
+        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
+#if defined(WITH_GPU)
+        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
+                                                         FLAGS_device_id);
+#endif
+      }
+      tc.Start();
+      if (!model.Predict(im, &res)) {
+        std::cerr << "Failed to predict." << std::endl;
+        return false;
+      }
+      tc.End();
+      end2end_statis.push_back(tc.Duration() * 1000);
+    }
+    float end2end = std::accumulate(end2end_statis.end() - repeats,
+                                    end2end_statis.end(), 0.f) /
+                    repeats;
+    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
+    auto vis_im = fastdeploy::vision::VisDetection(im, res);
+    cv::imwrite("vis_result.jpg", vis_im);
+    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+  }
+
+  return true;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  int repeats = FLAGS_repeat;
+  int warmup = FLAGS_warmup;
+  int dump_period = FLAGS_dump_period;
+  std::string cpu_mem_file_name = "result_cpu.txt";
+  std::string gpu_mem_file_name = "result_gpu.txt";
+  // Run model
+  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
+               cpu_mem_file_name, gpu_mem_file_name) != true) {
+    exit(1);
+  }
+  if (FLAGS_collect_memory_info) {
+    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
+    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+#if defined(WITH_GPU)
+    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
+    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+#endif
+  }
+  return 0;
+}
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "gflags/gflags.h"
+#include "fastdeploy/utils/perf.h"
+
+DEFINE_string(model, "", "Directory of the inference model.");
+DEFINE_string(image, "", "Path of the image file.");
+DEFINE_string(device, "cpu",
+              "Type of inference device, support 'cpu' or 'gpu'.");
+DEFINE_int32(device_id, 0, "device(gpu) id.");
+DEFINE_int32(warmup, 200, "Number of warmup for profiling.");
+DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
+DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
+DEFINE_string(backend, "default",
+              "The inference runtime backend, support: ['default', 'ort', "
+              "'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
+DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
+DEFINE_bool(
+    include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
+DEFINE_bool(
+    use_fp16, false,
+    "Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
+    "and 'lite' backend");
+DEFINE_bool(
+    collect_memory_info, false, "Whether to collect memory info");
+DEFINE_int32(dump_period, 100, "How often to collect memory info.");
+
+void PrintUsage() {
+  std::cout << "Usage: infer_demo --model model_path --image img_path --device "
+               "[cpu|gpu] --backend "
+               "[default|ort|paddle|ov|trt|paddle_trt] "
+               "--use_fp16 false"
+            << std::endl;
+  std::cout << "Default value of device: cpu" << std::endl;
+  std::cout << "Default value of backend: default" << std::endl;
+  std::cout << "Default value of use_fp16: false" << std::endl;
+}
+
+bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_device == "gpu") {
+    option->UseGpu();
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
+      option->UseTrtBackend();
+      if (FLAGS_backend == "paddle_trt") {
+        option->EnablePaddleToTrt();
+      }
+      if (FLAGS_use_fp16) {
+        option->EnableTrtFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with GPU, only support "
+                   "default/ort/paddle/trt/paddle_trt now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "cpu") {
+    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "ov") {
+      option->UseOpenVINOBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with CPU, only support "
+                   "default/ort/ov/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else {
+    std::cerr << "Only support device CPU/GPU now, " << FLAGS_device
+              << " is not supported." << std::endl;
+    return false;
+  }
+
+  return true;
+}
--- a/benchmark/python/README.md
+++ b/benchmark/python/README.md
--- a/benchmark/python/benchmark_ernie_seq_cls.py
+++ b/benchmark/python/benchmark_ernie_seq_cls.py
--- a/benchmark/python/benchmark_ppcls.py
+++ b/benchmark/python/benchmark_ppcls.py
@@ -17,6 +17,7 @@ import cv2
 import os
 import numpy as np
 import time
+from tqdm import tqdm


 def parse_arguments():
@@ -35,11 +36,22 @@ def parse_arguments():
    parser.add_argument(
        "--device_id", type=int, default=0, help="device(gpu) id")
    parser.add_argument(
-        "--iter_num",
+        "--profile_mode",
+        type=str,
+        default="runtime",
+        help="runtime or end2end.")
+    parser.add_argument(
+        "--repeat",
        required=True,
        type=int,
-        default=300,
-        help="number of iterations for computing performace.")
+        default=1000,
+        help="number of repeats for profiling.")
+    parser.add_argument(
+        "--warmup",
+        required=True,
+        type=int,
+        default=50,
+        help="number of warmup for profiling.")
    parser.add_argument(
        "--device",
        default="cpu",
@@ -59,6 +71,11 @@ def parse_arguments():
        type=ast.literal_eval,
        default=False,
        help="whether enable collect memory info")
+    parser.add_argument(
+        "--include_h2d_d2h",
+        type=ast.literal_eval,
+        default=False,
+        help="whether run profiling with h2d and d2h")
    args = parser.parse_args()
    return args

@@ -68,6 +85,8 @@ def build_option(args):
    device = args.device
    backend = args.backend
    enable_trt_fp16 = args.enable_trt_fp16
+    if args.profile_mode == "runtime":
+        option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup)
    option.set_cpu_thread_num(args.cpu_num_thread)
    if device == "gpu":
        option.use_gpu()
@@ -229,7 +248,6 @@ if __name__ == '__main__':
    gpu_id = args.device_id
    enable_collect_memory_info = args.enable_collect_memory_info
    dump_result = dict()
-    end2end_statis = list()
    cpu_mem = list()
    gpu_mem = list()
    gpu_util = list()
@@ -258,18 +276,28 @@ if __name__ == '__main__':
            monitor = Monitor(enable_gpu, gpu_id)
            monitor.start()

-        model.enable_record_time_of_runtime()
        im_ori = cv2.imread(args.image)
-        for i in range(args.iter_num):
-            im = im_ori
+        if args.profile_mode == "runtime":
+            result = model.predict(im_ori)
+            profile_time = model.get_profile_time()
+            dump_result["runtime"] = profile_time * 1000
+            f.writelines("Runtime(ms): {} \n".format(
+                str(dump_result["runtime"])))
+            print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
+        else:
+            # end2end
+            for i in range(args.warmup):
+                result = model.predict(im_ori)
+
            start = time.time()
-            result = model.predict(im)
-            end2end_statis.append(time.time() - start)
+            for i in tqdm(range(args.repeat)):
+                result = model.predict(im_ori)
+            end = time.time()
+            dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0
+            f.writelines("End2End(ms): {} \n".format(
+                str(dump_result["end2end"])))
+            print("End2End(ms): {} \n".format(str(dump_result["end2end"])))

-        runtime_statis = model.print_statis_info_of_runtime()
-
-        warmup_iter = args.iter_num // 5
-        end2end_statis_repeat = end2end_statis[warmup_iter:]
        if enable_collect_memory_info:
            monitor.stop()
            mem_info = monitor.output()
@@ -280,13 +308,6 @@ if __name__ == '__main__':
            dump_result["gpu_util"] = mem_info['gpu'][
                'utilization.gpu'] if 'gpu' in mem_info else 0

-        dump_result["runtime"] = runtime_statis["avg_time"] * 1000
-        dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-
-        f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
-        f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
-        print("End2End(ms): {} \n".format(str(dump_result["end2end"])))
        if enable_collect_memory_info:
            f.writelines("cpu_rss_mb: {} \n".format(
                str(dump_result["cpu_rss_mb"])))
@@ -297,7 +318,8 @@ if __name__ == '__main__':
            print("cpu_rss_mb: {} \n".format(str(dump_result["cpu_rss_mb"])))
            print("gpu_rss_mb: {} \n".format(str(dump_result["gpu_rss_mb"])))
            print("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
-    except:
+    except Exception as e:
        f.writelines("!!!!!Infer Failed\n")
+        raise e

    f.close()
--- a/benchmark/python/benchmark_ppdet.py
+++ b/benchmark/python/benchmark_ppdet.py
@@ -17,14 +17,16 @@ import cv2
 import os
 import numpy as np
 import time
+from sympy import EX
 from tqdm import tqdm

+
 def parse_arguments():
    import argparse
    import ast
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        "--model", required=True, help="Path of PaddleDetection model.")
+        "--model", required=True, help="Path of PaddleClas model.")
    parser.add_argument(
        "--image", type=str, required=False, help="Path of test image file.")
    parser.add_argument(
@@ -35,20 +37,31 @@ def parse_arguments():
    parser.add_argument(
        "--device_id", type=int, default=0, help="device(gpu) id")
    parser.add_argument(
-        "--iter_num",
+        "--profile_mode",
+        type=str,
+        default="runtime",
+        help="runtime or end2end.")
+    parser.add_argument(
+        "--repeat",
        required=True,
        type=int,
-        default=300,
-        help="number of iterations for computing performace.")
+        default=1000,
+        help="number of repeats for profiling.")
+    parser.add_argument(
+        "--warmup",
+        required=True,
+        type=int,
+        default=50,
+        help="number of warmup for profiling.")
    parser.add_argument(
        "--device",
        default="cpu",
-        help="Type of inference device, support 'cpu', 'gpu', 'kunlunxin', 'ascend' etc.")
+        help="Type of inference device, support 'cpu' or 'gpu'.")
    parser.add_argument(
        "--backend",
        type=str,
        default="default",
-        help="inference backend, default, ort, ov, trt, paddle, paddle_trt, lite.")
+        help="inference backend, default, ort, ov, trt, paddle, paddle_trt.")
    parser.add_argument(
        "--enable_trt_fp16",
        type=ast.literal_eval,
@@ -58,12 +71,17 @@ def parse_arguments():
        "--enable_lite_fp16",
        type=ast.literal_eval,
        default=False,
-        help="whether enable fp16 in lite backend")    
+        help="whether enable fp16 in Paddle Lite backend")
    parser.add_argument(
        "--enable_collect_memory_info",
        type=ast.literal_eval,
        default=False,
        help="whether enable collect memory info")
+    parser.add_argument(
+        "--include_h2d_d2h",
+        type=ast.literal_eval,
+        default=False,
+        help="whether run profiling with h2d and d2h")
    args = parser.parse_args()
    return args

@@ -74,6 +92,8 @@ def build_option(args):
    backend = args.backend
    enable_trt_fp16 = args.enable_trt_fp16
    enable_lite_fp16 = args.enable_lite_fp16
+    if args.profile_mode == "runtime":
+        option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup)
    option.set_cpu_thread_num(args.cpu_num_thread)
    if device == "gpu":
        option.use_gpu()
@@ -130,7 +150,7 @@ def build_option(args):
        else:
            raise Exception(
                "While inference with CPU, only support default/ort/lite/paddle now, {} is not supported.".
-                format(backend))    
+                format(backend))
    elif device == "ascend":
        option.use_ascend()
        if backend == "lite":
@@ -142,11 +162,11 @@ def build_option(args):
        else:
            raise Exception(
                "While inference with CPU, only support default/lite now, {} is not supported.".
-                format(backend))                
+                format(backend))
    else:
        raise Exception(
-            "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.".format(
-                device))
+            "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.".
+            format(device))

    return option

@@ -267,7 +287,6 @@ if __name__ == '__main__':
    gpu_id = args.device_id
    enable_collect_memory_info = args.enable_collect_memory_info
    dump_result = dict()
-    end2end_statis = list()
    cpu_mem = list()
    gpu_mem = list()
    gpu_util = list()
@@ -317,18 +336,28 @@ if __name__ == '__main__':
            monitor = Monitor(enable_gpu, gpu_id)
            monitor.start()

-        model.enable_record_time_of_runtime()
        im_ori = cv2.imread(args.image)
-        for i in tqdm(range(args.iter_num)):
-            im = im_ori
+        if args.profile_mode == "runtime":
+            result = model.predict(im_ori)
+            profile_time = model.get_profile_time()
+            dump_result["runtime"] = profile_time * 1000
+            f.writelines("Runtime(ms): {} \n".format(
+                str(dump_result["runtime"])))
+            print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
+        else:
+            # end2end
+            for i in range(args.warmup):
+                result = model.predict(im_ori)
+
            start = time.time()
-            result = model.predict(im)
-            end2end_statis.append(time.time() - start)
+            for i in tqdm(range(args.repeat)):
+                result = model.predict(im_ori)
+            end = time.time()
+            dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0
+            f.writelines("End2End(ms): {} \n".format(
+                str(dump_result["end2end"])))
+            print("End2End(ms): {} \n".format(str(dump_result["end2end"])))

-        runtime_statis = model.print_statis_info_of_runtime()
-
-        warmup_iter = args.iter_num // 5
-        end2end_statis_repeat = end2end_statis[warmup_iter:]
        if enable_collect_memory_info:
            monitor.stop()
            mem_info = monitor.output()
@@ -339,13 +368,6 @@ if __name__ == '__main__':
            dump_result["gpu_util"] = mem_info['gpu'][
                'utilization.gpu'] if 'gpu' in mem_info else 0

-        dump_result["runtime"] = runtime_statis["avg_time"] * 1000
-        dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
-
-        f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
-        f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
-        print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
-        print("End2End(ms): {} \n".format(str(dump_result["end2end"])))
        if enable_collect_memory_info:
            f.writelines("cpu_rss_mb: {} \n".format(
                str(dump_result["cpu_rss_mb"])))
@@ -356,7 +378,8 @@ if __name__ == '__main__':
            print("cpu_rss_mb: {} \n".format(str(dump_result["cpu_rss_mb"])))
            print("gpu_rss_mb: {} \n".format(str(dump_result["gpu_rss_mb"])))
            print("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
-    except:
+    except Exception as e:
        f.writelines("!!!!!Infer Failed\n")
+        raise e

    f.close()
--- a/benchmark/python/benchmark_ppocr.py
+++ b/benchmark/python/benchmark_ppocr.py
--- a/benchmark/python/benchmark_ppseg.py
+++ b/benchmark/python/benchmark_ppseg.py
--- a/benchmark/python/benchmark_uie.py
+++ b/benchmark/python/benchmark_uie.py
--- a/benchmark/python/benchmark_yolo.py
+++ b/benchmark/python/benchmark_yolo.py
--- a/benchmark/python/convert_info.py
+++ b/benchmark/python/convert_info.py
--- a/benchmark/python/requirements.txt
+++ b/benchmark/python/requirements.txt
--- a/benchmark/python/run_benchmark_ernie_seq_cls.sh
+++ b/benchmark/python/run_benchmark_ernie_seq_cls.sh
--- a/benchmark/python/run_benchmark_ppcls.sh
+++ b/benchmark/python/run_benchmark_ppcls.sh
--- a/benchmark/python/run_benchmark_ppdet.sh
+++ b/benchmark/python/run_benchmark_ppdet.sh
--- a/benchmark/python/run_benchmark_ppocr.sh
+++ b/benchmark/python/run_benchmark_ppocr.sh
--- a/benchmark/python/run_benchmark_ppseg.sh
+++ b/benchmark/python/run_benchmark_ppseg.sh
--- a/benchmark/python/run_benchmark_uie.sh
+++ b/benchmark/python/run_benchmark_uie.sh
--- a/benchmark/python/run_benchmark_yolo.sh
+++ b/benchmark/python/run_benchmark_yolo.sh
--- a/c_api/CMakeLists.txt
+++ b/c_api/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##################################### Building: FastDeploy C API #######################################
+message("----start--CAPI-------")
+
+if(NOT WITH_CAPI)
+  return()
+endif()
+
+file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc)
+if(NOT ENABLE_VISION)
+    file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc)
+    list(REMOVE_ITEM DEPLOY_CAPI_SRCS ${DEPLOY_VISION_CAPI_SRCS})
+endif()
+list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_CAPI_SRCS})
+include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api)
--- a/c_api/fastdeploy_capi/fd_common.h
+++ b/c_api/fastdeploy_capi/fd_common.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#if defined(_WIN32)
+#ifdef FD_CAPI
+#define FASTDEPLOY_CAPI_EXPORT __declspec(dllexport)
+#else
+#define FASTDEPLOY_CAPI_EXPORT __declspec(dllimport)
+#endif  // FD_CAPI
+#else
+#define FASTDEPLOY_CAPI_EXPORT __attribute__((visibility("default")))
+#endif  // _WIN32
+
+///
+/// __fd_give means that a new object is returned. The user should make sure
+/// that the returned pointer is used exactly once as a value for an __fd_take
+/// argument. In between, it can be used as a value for as many __fd_keep
+/// arguments as the user likes.
+///
+#ifndef __fd_give
+#define __fd_give
+#endif
+///
+/// __fd_take means that the object the argument points to is taken over by the
+/// function and may no longer be used by the user as an argument to any other
+/// function. The pointer value must be one returned by a function returning an
+/// __fd_give pointer.
+///
+#ifndef __fd_take
+#define __fd_take
+#endif
+///
+/// __fd_keep means that the function will only use the object temporarily. The
+/// object which the argument points to is not taken over by the function. After
+/// the function has finished, the user can still use it as an argument to other
+/// functions.
+///
+#ifndef __fd_keep
+#define __fd_keep
+#endif
+
+typedef int8_t FD_C_Bool;
+#define TRUE 1
+#define FALSE 0
+
+#define FD_ENUM(type)                                                          \
+  typedef int32_t type;                                                        \
+  enum
+
+FD_ENUM(FD_C_ModelFormat){
+    AUTOREC,      ///< Auto recognize the model format by model file name
+    PADDLE,       ///< Model with paddlepaddle format
+    ONNX,         ///< Model with ONNX format
+    RKNN,         ///< Model with RKNN format
+    TORCHSCRIPT,  ///< Model with TorchScript format
+    SOPHGO,       ///< Model with SOPHGO format
+};
+
+FD_ENUM(FD_C_rknpu2_CpuName){
+    RK356X = 0, /* run on RK356X. */
+    RK3588 = 1, /* default,run on RK3588. */
+    UNDEFINED,
+};
+
+FD_ENUM(FD_C_rknpu2_CoreMask){
+    RKNN_NPU_CORE_AUTO = 0,  //< default, run on NPU core randomly.
+    RKNN_NPU_CORE_0 = 1,     //< run on NPU core 0.
+    RKNN_NPU_CORE_1 = 2,     //< run on NPU core 1.
+    RKNN_NPU_CORE_2 = 4,     //< run on NPU core 2.
+    RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 |
+                        RKNN_NPU_CORE_1,  //< run on NPU core 1 and core 2.
+    RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 |
+                          RKNN_NPU_CORE_2,  //< run on NPU core 1 and core 2.
+    RKNN_NPU_CORE_UNDEFINED,
+};
+
+FD_ENUM(FD_C_LitePowerMode){
+    LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
+    LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
+    LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
+    LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
+    LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
+    LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
+};
--- a/c_api/fastdeploy_capi/fd_type.h
+++ b/c_api/fastdeploy_capi/fd_type.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "fastdeploy_capi/fd_common.h"  // NOLINT
+
+typedef struct FD_C_OneDimArrayUint8 {
+  size_t size;
+  uint8_t* data;
+} FD_C_OneDimArrayUint8;  // std::vector<int32_t>
+
+typedef struct FD_C_OneDimArrayInt32 {
+  size_t size;
+  int32_t* data;
+} FD_C_OneDimArrayInt32;  // std::vector<int32_t>
+
+typedef struct FD_C_OneDimArraySize {
+  size_t size;
+  size_t* data;
+} FD_C_OneDimArraySize;  // std::vector<size_t>
+
+typedef struct FD_C_OneDimArrayInt64 {
+  size_t size;
+  int64_t* data;
+} FD_C_OneDimArrayInt64;  // std::vector<int64_t>
+
+typedef struct FD_C_OneDimArrayFloat {
+  size_t size;
+  float* data;
+} FD_C_OneDimArrayFloat;  // std::vector<float>
+
+typedef struct FD_C_Cstr {
+  size_t size;
+  char* data;
+} FD_C_Cstr;  // std::string
+
+typedef struct FD_C_OneDimArrayCstr {
+  size_t size;
+  FD_C_Cstr* data;
+} FD_C_OneDimArrayCstr;  // std::vector<std::string>
+
+typedef struct FD_C_TwoDimArraySize {
+  size_t size;
+  FD_C_OneDimArraySize* data;
+} FD_C_TwoDimArraySize;  // std::vector<std::vector<size_t>>
+
+typedef struct FD_C_TwoDimArrayFloat {
+  size_t size;
+  FD_C_OneDimArrayFloat* data;
+} FD_C_TwoDimArrayFloat;  // std::vector<std::vector<float>>
+
+typedef void* FD_C_Mat;
--- a/c_api/fastdeploy_capi/runtime_option.cc
+++ b/c_api/fastdeploy_capi/runtime_option.cc
@@ -0,0 +1,418 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/runtime_option.h"
+
+#include "fastdeploy/utils/utils.h"
+#include "fastdeploy_capi/types_internal.h"
+
+extern "C" {
+
+FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
+  FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper =
+      new FD_C_RuntimeOptionWrapper();
+  fd_c_runtime_option_wrapper->runtime_option =
+      std::unique_ptr<fastdeploy::RuntimeOption>(
+          new fastdeploy::RuntimeOption());
+  return fd_c_runtime_option_wrapper;
+}
+
+void FD_C_DestroyRuntimeOption(
+    __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  delete fd_c_runtime_option_wrapper;
+}
+
+void FD_C_RuntimeOptionWrapperSetModelPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* model_path, const char* params_path,
+    const FD_C_ModelFormat format) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetModelPath(std::string(model_path),
+                               std::string(params_path),
+                               static_cast<fastdeploy::ModelFormat>(format));
+}
+
+void FD_C_RuntimeOptionWrapperSetModelBuffer(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* model_buffer, const char* params_buffer,
+    const FD_C_ModelFormat format) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetModelBuffer(model_buffer, params_buffer,
+                                 static_cast<fastdeploy::ModelFormat>(format));
+}
+
+void FD_C_RuntimeOptionWrapperUseCpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseCpu();
+}
+
+void FD_C_RuntimeOptionWrapperUseGpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int gpu_id) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseGpu(gpu_id);
+}
+
+void FD_C_RuntimeOptionWrapperUseRKNPU2(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_rknpu2_CpuName rknpu2_name, FD_C_rknpu2_CoreMask rknpu2_core) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseRKNPU2(
+      static_cast<fastdeploy::rknpu2::CpuName>(rknpu2_name),
+      static_cast<fastdeploy::rknpu2::CoreMask>(rknpu2_core));
+}
+
+void FD_C_RuntimeOptionWrapperUseTimVX(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseTimVX();
+}
+
+void FD_C_RuntimeOptionWrapperUseAscend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseAscend();
+}
+
+void FD_C_RuntimeOptionWrapperUseKunlunXin(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
+    FD_C_Bool autotune, const char* autotune_file, const char* precision,
+    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseKunlunXin(kunlunxin_id, l3_workspace_size, bool(locked),
+                               bool(autotune), std::string(autotune_file),
+                               std::string(precision), bool(adaptive_seqlen),
+                               bool(enable_multi_stream));
+}
+
+void FD_C_RuntimeOptionWrapperUseSophgo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseSophgo();
+}
+
+void FD_C_RuntimeOptionWrapperSetExternalStream(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    void* external_stream) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetExternalStream(external_stream);
+}
+
+void FD_C_RuntimeOptionWrapperSetCpuThreadNum(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int thread_num) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetCpuThreadNum(thread_num);
+}
+
+void FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int level) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetOrtGraphOptLevel(level);
+}
+
+void FD_C_RuntimeOptionWrapperUsePaddleBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UsePaddleBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUsePaddleInferBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  return FD_C_RuntimeOptionWrapperUsePaddleBackend(fd_c_runtime_option_wrapper);
+}
+
+void FD_C_RuntimeOptionWrapperUseOrtBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseOrtBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUseSophgoBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseSophgoBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUseTrtBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseTrtBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUsePorosBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UsePorosBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUseOpenVINOBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseOpenVINOBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUseLiteBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseLiteBackend();
+}
+
+void FD_C_RuntimeOptionWrapperUsePaddleLiteBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  return FD_C_RuntimeOptionWrapperUseLiteBackend(fd_c_runtime_option_wrapper);
+}
+
+void FD_C_RuntimeOptionWrapperSetPaddleMKLDNN(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_Bool pd_mkldnn) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetPaddleMKLDNN(pd_mkldnn);
+}
+
+void FD_C_RuntimeOptionWrapperEnablePaddleToTrt(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnablePaddleToTrt();
+}
+
+void FD_C_RuntimeOptionWrapperDeletePaddleBackendPass(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* delete_pass_name) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DeletePaddleBackendPass(std::string(delete_pass_name));
+}
+
+void FD_C_RuntimeOptionWrapperEnablePaddleLogInfo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnablePaddleLogInfo();
+}
+
+void FD_C_RuntimeOptionWrapperDisablePaddleLogInfo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisablePaddleLogInfo();
+}
+
+void FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int size) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetPaddleMKLDNNCacheSize(size);
+}
+
+void FD_C_RuntimeOptionWrapperSetOpenVINODevice(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* name) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetOpenVINODevice(std::string(name));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* optimized_model_dir) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLiteOptimizedModelDir(std::string(optimized_model_dir));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_subgraph_partition_config_path) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLiteSubgraphPartitionPath(
+      std::string(nnadapter_subgraph_partition_config_path));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_subgraph_partition_config_buffer) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLiteSubgraphPartitionConfigBuffer(
+      std::string(nnadapter_subgraph_partition_config_buffer));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteContextProperties(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_context_properties) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLiteContextProperties(
+      std::string(nnadapter_context_properties));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteModelCacheDir(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_model_cache_dir) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLiteModelCacheDir(std::string(nnadapter_model_cache_dir));
+}
+
+void FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_mixed_precision_quantization_config_path) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+}
+
+void FD_C_RuntimeOptionWrapperEnableLiteFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnableLiteFP16();
+}
+
+void FD_C_RuntimeOptionWrapperDisableLiteFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisableLiteFP16();
+}
+
+void FD_C_RuntimeOptionWrapperEnableLiteInt8(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnableLiteInt8();
+}
+
+void FD_C_RuntimeOptionWrapperDisableLiteInt8(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisableLiteInt8();
+}
+
+void FD_C_RuntimeOptionWrapperSetLitePowerMode(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_LitePowerMode mode) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetLitePowerMode(
+      static_cast<fastdeploy::LitePowerMode>(mode));
+}
+
+void FD_C_RuntimeOptionWrapperEnableTrtFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnableTrtFP16();
+}
+
+void FD_C_RuntimeOptionWrapperDisableTrtFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisableTrtFP16();
+}
+
+void FD_C_RuntimeOptionWrapperSetTrtCacheFile(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* cache_file_path) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetTrtCacheFile(std::string(cache_file_path));
+}
+
+void FD_C_RuntimeOptionWrapperEnablePinnedMemory(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnablePinnedMemory();
+}
+
+void FD_C_RuntimeOptionWrapperDisablePinnedMemory(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisablePinnedMemory();
+}
+
+void FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->EnablePaddleTrtCollectShape();
+}
+
+void FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->DisablePaddleTrtCollectShape();
+}
+
+void FD_C_RuntimeOptionWrapperSetOpenVINOStreams(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int num_streams) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetOpenVINOStreams(num_streams);
+}
+
+void FD_C_RuntimeOptionWrapperUseIpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int device_num, int micro_batch_size, FD_C_Bool enable_pipelining,
+    int batches_per_step) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->UseIpu(device_num, micro_batch_size, enable_pipelining,
+                         batches_per_step);
+}
+
+void FD_C_RuntimeOptionWrapperSetIpuConfig(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
+    FD_C_Bool enable_half_partial) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  runtime_option->SetIpuConfig(enable_fp16, replica_num,
+                               available_memory_proportion,
+                               enable_half_partial);
+}
+
+}  // extern "C"
--- a/c_api/fastdeploy_capi/runtime_option.h
+++ b/c_api/fastdeploy_capi/runtime_option.h
@@ -0,0 +1,517 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "fastdeploy_capi/fd_common.h"
+
+typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new FD_C_RuntimeOptionWrapper object
+ *
+ * \return Return a pointer to FD_C_RuntimeOptionWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_RuntimeOptionWrapper*
+FD_C_CreateRuntimeOptionWrapper();
+
+/** \brief Destroy a FD_C_RuntimeOptionWrapper object
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyRuntimeOptionWrapper(
+    __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/** \brief Set path of model file and parameter file
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
+ * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
+ * \param[in] format Format of the loaded model
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetModelPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* model_path, const char* params_path,
+    const FD_C_ModelFormat format);
+
+/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] model_buffer The memory buffer of model
+ * \param[in] params_buffer The memory buffer of the combined parameters file
+ * \param[in] format Format of the loaded model
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetModelBuffer(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* model_buffer, const char* params_buffer,
+    const FD_C_ModelFormat);
+
+/** \brief Use cpu to inference, the runtime will inference on CPU by default
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseCpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/** \brief Use Nvidia GPU to inference
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseGpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int gpu_id);
+
+/** \brief Use RKNPU2 to inference
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] rknpu2_name  CpuName enum value
+ * \param[in] rknpu2_core CoreMask enum value
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseRKNPU2(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_rknpu2_CpuName rknpu2_name, FD_C_rknpu2_CoreMask rknpu2_core);
+
+/** \brief Use TimVX to inference
+ *
+ *  \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseTimVX(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/** \brief Use Huawei Ascend to inference
+ *
+ *  \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseAscend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+///
+/// \brief Turn on KunlunXin XPU.
+///
+/// \param[in] fd_c_runtime_option_wrapper pointer to \
+                    FD_C_RuntimeOptionWrapper object
+/// \param[in] kunlunxin_id the KunlunXin XPU card to use\
+                    (default is 0).
+/// \param[in] l3_workspace_size The size of the video memory allocated\
+///         by the l3 cache, the maximum is 16M.
+/// \param[in] locked Whether the allocated L3 cache can be locked. If false,
+///       it means that the L3 cache is not locked, and the allocated L3
+///       cache can be shared by multiple models, and multiple models
+///       sharing the L3 cache will be executed sequentially on the card.
+/// \param[in] autotune Whether to autotune the conv operator in the model. If
+///       true, when the conv operator of a certain dimension is executed
+///       for the first time, it will automatically search for a better
+///       algorithm to improve the performance of subsequent conv operators
+///       of the same dimension.
+/// \param[in] autotune_file Specify the path of the autotune file. If
+///       autotune_file is specified, the algorithm specified in the
+///       file will be used and autotune will not be performed again.
+/// \param[in] precision Calculation accuracy of multi_encoder
+/// \param[in] adaptive_seqlen Is the input of multi_encoder variable length
+/// \param[in] enable_multi_stream Whether to enable the multi stream of
+///        KunlunXin XPU.
+///
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int kunlunxin_id, int l3_workspace_size, FD_C_Bool locked,
+    FD_C_Bool autotune, const char* autotune_file, const char* precision,
+    FD_C_Bool adaptive_seqlen, FD_C_Bool enable_multi_stream);
+
+/** Use Sophgo to inference
+ *
+ *  \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseSophgo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetExternalStream(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    void* external_stream);
+
+/**
+  * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] thread_num number of threads
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetCpuThreadNum(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int thread_num);
+
+/**
+  * @brief Set ORT graph opt level, default is decide by ONNX Runtime itself
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] level optimization level
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int level);
+
+/**
+  * @brief Set Paddle Inference as inference backend, support CPU/GPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUsePaddleBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Wrapper function of UsePaddleBackend()
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperUsePaddleInferBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set ONNX Runtime as inference backend, support CPU/GPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseOrtBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set SOPHGO Runtime as inference backend, support CPU/GPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseSophgoBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set TensorRT as inference backend, only support GPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseTrtBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set Poros backend as inference backend, support CPU/GPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUsePorosBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set OpenVINO as inference backend, only support CPU
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseOpenVINOBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set Paddle Lite as inference backend, only support arm cpu
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseLiteBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Wrapper function of UseLiteBackend()
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperUsePaddleLiteBackend(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set mkldnn switch while using Paddle Inference as inference backend
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] pd_mkldnn whether to use mkldnn
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetPaddleMKLDNN(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_Bool pd_mkldnn);
+
+/**
+  * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePaddleToTrt(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] delete_pass_name pass name
+  */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperDeletePaddleBackendPass(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* delete_pass_name);
+
+/**
+  * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePaddleLogInfo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Disable print debug information while using Paddle Inference as inference backend
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperDisablePaddleLogInfo(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] size cache size
+  */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, int size);
+
+/**
+  * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] name device name
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOpenVINODevice(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* name);
+
+/**
+ * @brief Set optimzed model dir for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] optimized_model_dir optimzed model dir
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* optimized_model_dir);
+
+/**
+ * @brief Set subgraph partition path for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] nnadapter_subgraph_partition_config_path subgraph partition path
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_subgraph_partition_config_path);
+
+/**
+ * @brief Set subgraph partition path for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] nnadapter_subgraph_partition_config_buffer subgraph partition path
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_subgraph_partition_config_buffer);
+
+/**
+ * @brief Set context properties for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] nnadapter_context_properties context properties
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteContextProperties(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_context_properties);
+
+/**
+ * @brief Set model cache dir for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] nnadapter_model_cache_dir model cache dir
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteModelCacheDir(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_model_cache_dir);
+
+/**
+ * @brief Set mixed precision quantization config path for Paddle Lite backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] nnadapter_mixed_precision_quantization_config_path mixed precision quantization config path
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* nnadapter_mixed_precision_quantization_config_path);
+
+/**
+ * @brief enable half precision while use paddle lite backend
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableLiteFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief disable half precision, change to full precision(float32)
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableLiteFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief enable int8 precision while use paddle lite backend
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableLiteInt8(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief disable int8 precision, change to full precision(float32)
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableLiteInt8(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] mode power mode
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetLitePowerMode(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_LitePowerMode mode);
+
+/**
+ * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnableTrtFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Disable FP16 inference while using TensorRT backend
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisableTrtFP16(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] cache_file_path cache file path
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetTrtCacheFile(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const char* cache_file_path);
+
+/**
+ * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperEnablePinnedMemory(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Disable pinned memory
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperDisablePinnedMemory(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Enable to collect shape in paddle trt backend
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+ * @brief Disable to collect shape in paddle trt backend
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ */
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+
+/**
+  * @brief Set number of streams by the OpenVINO backends
+  *
+  * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+  * \param[in] num_streams number of streams
+  */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetOpenVINOStreams(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int num_streams);
+
+/**
+ * @brief \Use Graphcore IPU to inference.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] device_num the number of IPUs.
+ * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
+ * \param[in] enable_pipelining enable pipelining.
+ * \param[in] batches_per_step the number of batches per run in pipelining.
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseIpu(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int device_num, int micro_batch_size, FD_C_Bool enable_pipelining,
+    int batches_per_step);
+
+/** \brief Set IPU config.
+ *
+ * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
+ * \param[in] enable_fp16 enable fp16.
+ * \param[in] replica_num the number of graph replication.
+ * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
+ * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
+ */
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetIpuConfig(
+    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
+    FD_C_Bool enable_half_partial);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/c_api/fastdeploy_capi/types_internal.cc
+++ b/c_api/fastdeploy_capi/types_internal.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/types_internal.h"
+
+namespace fastdeploy {
+
+#ifdef ENABLE_VISION
+
+std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>&
+FD_C_CheckAndConvertPaddleClasModelWrapper(
+    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
+  FDASSERT(
+      fd_c_paddleclas_model_wrapper != nullptr,
+      "The pointer of fd_c_paddleclas_model_wrapper shouldn't be nullptr.");
+  return fd_c_paddleclas_model_wrapper->paddleclas_model;
+}
+
+std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>&
+FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) {
+  FDASSERT(fd_c_ppyoloe_wrapper != nullptr,
+           "The pointer of fd_c_ppyoloe_wrapper shouldn't be nullptr.");
+  return fd_c_ppyoloe_wrapper->ppyoloe_model;
+}
+
+std::unique_ptr<fastdeploy::vision::ClassifyResult>&
+FD_C_CheckAndConvertClassifyResultWrapper(
+    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+  FDASSERT(fd_c_classify_result_wrapper != nullptr,
+           "The pointer of fd_c_classify_result_wrapper shouldn't be nullptr.");
+  return fd_c_classify_result_wrapper->classify_result;
+}
+
+std::unique_ptr<fastdeploy::vision::DetectionResult>&
+FD_C_CheckAndConvertDetectionResultWrapper(
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+  FDASSERT(
+      fd_c_detection_result_wrapper != nullptr,
+      "The pointer of fd_c_detection_result_wrapper shouldn't be nullptr.");
+  return fd_c_detection_result_wrapper->detection_result;
+}
+#endif
+
+std::unique_ptr<fastdeploy::RuntimeOption>&
+FD_C_CheckAndConvertRuntimeOptionWrapper(
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
+  FDASSERT(fd_c_runtime_option_wrapper != nullptr,
+           "The pointer of fd_c_runtime_option_wrapper shouldn't be nullptr.");
+  return fd_c_runtime_option_wrapper->runtime_option;
+}
+
+}  // namespace fastdeploy
--- a/c_api/fastdeploy_capi/types_internal.h
+++ b/c_api/fastdeploy_capi/types_internal.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/runtime/runtime_option.h"
+#include "fastdeploy_capi/fd_type.h"
+#include <memory>
+
+#ifdef ENABLE_VISION
+#include "fastdeploy/vision/classification/ppcls/model.h"
+#include "fastdeploy/vision/common/result.h"
+#include "fastdeploy/vision/detection/ppdet/model.h"
+
+typedef struct FD_C_ClassifyResultWrapper {
+  std::unique_ptr<fastdeploy::vision::ClassifyResult> classify_result;
+} FD_C_ClassifyResultWrapper;
+
+typedef struct FD_C_DetectionResultWrapper {
+  std::unique_ptr<fastdeploy::vision::DetectionResult> detection_result;
+} FD_C_DetectionResultWrapper;
+
+typedef struct FD_C_PaddleClasModelWrapper {
+  std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>
+      paddleclas_model;
+} FD_C_PaddleClasModelWrapper;
+
+typedef struct FD_C_PPYOLOEWrapper {
+  std::unique_ptr<fastdeploy::vision::detection::PPYOLOE> ppyoloe_model;
+} FD_C_PPYOLOEWrapper;
+
+namespace fastdeploy {
+std::unique_ptr<fastdeploy::vision::ClassifyResult>&
+FD_C_CheckAndConvertClassifyResultWrapper(
+    FD_C_ClassifyResultWrapper* fd_classify_result_wrapper);
+std::unique_ptr<fastdeploy::vision::DetectionResult>&
+FD_C_CheckAndConvertDetectionResultWrapper(
+    FD_C_DetectionResultWrapper* fd_detection_result_wrapper);
+std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>&
+FD_C_CheckAndConvertPaddleClasModelWrapper(
+    FD_C_PaddleClasModelWrapper* fd_paddleclas_model_wrapper);
+std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>&
+FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper);
+}  // namespace fastdeploy
+
+#endif
+
+typedef struct FD_C_RuntimeOptionWrapper {
+  std::unique_ptr<fastdeploy::RuntimeOption> runtime_option;
+} FD_C_RuntimeOptionWrapper;
+
+namespace fastdeploy {
+std::unique_ptr<fastdeploy::RuntimeOption>&
+FD_C_CheckAndConvertRuntimeOptionWrapper(
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper);
+}
+
+#define CHECK_AND_CONVERT_FD_TYPE(TYPENAME, variable_name)                     \
+  fastdeploy::FD_C_CheckAndConvert##TYPENAME(variable_name)
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
@@ -0,0 +1,53 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/vision/classification/ppcls/model.h"
+
+#include "fastdeploy_capi/types_internal.h"
+
+extern "C" {
+
+FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper =
+      new FD_C_PaddleClasModelWrapper();
+  fd_c_paddleclas_model_wrapper->paddleclas_model =
+      std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>(
+          new fastdeploy::vision::classification::PaddleClasModel(
+              std::string(model_file), std::string(params_file),
+              std::string(config_file), *runtime_option,
+              static_cast<fastdeploy::ModelFormat>(model_format)));
+  return fd_c_paddleclas_model_wrapper;
+}
+
+void FD_C_DestroyPaddleClasModelWrapper(
+    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
+  delete fd_c_paddleclas_model_wrapper;
+}
+
+FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
+    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
+    FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+  cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
+  auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper);
+  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
+      ClassifyResultWrapper, fd_c_classify_result_wrapper);
+  return paddleclas_model->Predict(im, classify_result.get());
+}
+}
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.h
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
+#include "fastdeploy_capi/runtime_option.h"
+#include "fastdeploy_capi/vision/result.h"
+
+typedef struct FD_C_PaddleClasModelWrapper FD_C_PaddleClasModelWrapper;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new FD_C_PaddleClasModelWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleClasModelWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PaddleClasModelWrapper*
+FD_C_CreatePaddleClasModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format);
+
+/** \brief Destroy a FD_C_PaddleClasModelWrapper object
+ *
+ * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyPaddleClasModelWrapper(
+    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper);
+
+/** \brief Predict the classification result for an input image
+ *
+ * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_classify_result_wrapper pointer to FD_C_PaddleClasModelWrapper object, which stores the result.
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
+    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
+    FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
@@ -0,0 +1,53 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/vision/detection/ppdet/model.h"
+
+#include "fastdeploy_capi/types_internal.h"
+#include "fastdeploy_capi/vision/visualize.h"
+
+extern "C" {
+
+FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper = new FD_C_PPYOLOEWrapper();
+  fd_c_ppyoloe_wrapper->ppyoloe_model =
+      std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>(
+          new fastdeploy::vision::detection::PPYOLOE(
+              std::string(model_file), std::string(params_file),
+              std::string(config_file), *runtime_option,
+              static_cast<fastdeploy::ModelFormat>(model_format)));
+  return fd_c_ppyoloe_wrapper;
+}
+
+void FD_C_DestroyPPYOLOEWrapper(
+    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) {
+  delete fd_c_ppyoloe_wrapper;
+}
+
+FD_C_Bool FD_C_PPYOLOEWrapperPredict(
+    FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+  cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
+  auto& ppyoloe_model =
+      CHECK_AND_CONVERT_FD_TYPE(PPYOLOEWrapper, fd_c_ppyoloe_wrapper);
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+  return ppyoloe_model->Predict(im, detection_result.get());
+}
+}
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.h
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
+#include "fastdeploy_capi/runtime_option.h"
+#include "fastdeploy_capi/vision/result.h"
+
+typedef struct FD_C_PPYOLOEWrapper FD_C_PPYOLOEWrapper;
+typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new FD_C_PPYOLOEWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PPYOLOEWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPYOLOEWrapper*
+FD_C_CreatesPPYOLOEWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format);
+
+/** \brief Destroy a FD_C_PPYOLOEWrapper object
+ *
+ * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_DestroyPPYOLOEWrapper(__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPYOLOEWrapperPredict(
+    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/c_api/fastdeploy_capi/vision/result.cc
+++ b/c_api/fastdeploy_capi/vision/result.cc
@@ -0,0 +1,238 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/vision/result.h"
+
+#include "fastdeploy/utils/utils.h"
+#include "fastdeploy_capi/types_internal.h"
+
+extern "C" {
+
+// Classification Results
+
+FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapper() {
+  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
+      new FD_C_ClassifyResultWrapper();
+  fd_c_classify_result_wrapper->classify_result =
+      std::unique_ptr<fastdeploy::vision::ClassifyResult>(
+          new fastdeploy::vision::ClassifyResult());
+  return fd_c_classify_result_wrapper;
+}
+
+void FD_C_DestroyClassifyResultWrapper(
+    __fd_take FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+  delete fd_c_classify_result_wrapper;
+}
+
+void FD_C_DestroyClassifyResult(
+    __fd_take FD_C_ClassifyResult* fd_c_classify_result) {
+  if (fd_c_classify_result == nullptr) return;
+  // delete label_ids
+  delete[] fd_c_classify_result->label_ids.data;
+  // delete scores
+  delete[] fd_c_classify_result->scores.data;
+  delete fd_c_classify_result;
+}
+
+FD_C_ClassifyResult* FD_C_ClassifyResultWrapperGetData(
+    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
+      ClassifyResultWrapper, fd_c_classify_result_wrapper);
+  FD_C_ClassifyResult* fd_c_classify_result_data = new FD_C_ClassifyResult();
+  // copy label_ids
+  fd_c_classify_result_data->label_ids.size = classify_result->label_ids.size();
+  fd_c_classify_result_data->label_ids.data =
+      new int32_t[fd_c_classify_result_data->label_ids.size];
+  memcpy(fd_c_classify_result_data->label_ids.data,
+         classify_result->label_ids.data(),
+         sizeof(int32_t) * fd_c_classify_result_data->label_ids.size);
+  // copy scores
+  fd_c_classify_result_data->scores.size = classify_result->scores.size();
+  fd_c_classify_result_data->scores.data =
+      new float[fd_c_classify_result_data->scores.size];
+  memcpy(fd_c_classify_result_data->scores.data, classify_result->scores.data(),
+         sizeof(float) * fd_c_classify_result_data->scores.size);
+  fd_c_classify_result_data->type =
+      static_cast<FD_C_ResultType>(classify_result->type);
+  return fd_c_classify_result_data;
+}
+
+FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
+    __fd_keep FD_C_ClassifyResult* fd_c_classify_result) {
+  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
+      FD_C_CreateClassifyResultWrapper();
+  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
+      ClassifyResultWrapper, fd_c_classify_result_wrapper);
+  // copy label_ids
+  classify_result->label_ids.resize(fd_c_classify_result->label_ids.size);
+  memcpy(classify_result->label_ids.data(),
+         fd_c_classify_result->label_ids.data,
+         sizeof(int32_t) * fd_c_classify_result->label_ids.size);
+  // copy scores
+  classify_result->scores.resize(fd_c_classify_result->scores.size);
+  memcpy(classify_result->scores.data(), fd_c_classify_result->scores.data,
+         sizeof(int32_t) * fd_c_classify_result->scores.size);
+  classify_result->type =
+      static_cast<fastdeploy::vision::ResultType>(fd_c_classify_result->type);
+  return fd_c_classify_result_wrapper;
+}
+
+// Detection Results
+
+FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper() {
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
+      new FD_C_DetectionResultWrapper();
+  fd_c_detection_result_wrapper->detection_result =
+      std::unique_ptr<fastdeploy::vision::DetectionResult>(
+          new fastdeploy::vision::DetectionResult());
+  return fd_c_detection_result_wrapper;
+}
+
+void FD_C_DestroyDetectionResultWrapper(
+    __fd_take FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+  delete fd_c_detection_result_wrapper;
+}
+
+void FD_C_DestroyDetectionResult(
+    __fd_take FD_C_DetectionResult* fd_c_detection_result) {
+  if (fd_c_detection_result == nullptr) return;
+  // delete boxes
+  for (size_t i = 0; i < fd_c_detection_result->boxes.size; i++) {
+    delete[] fd_c_detection_result->boxes.data[i].data;
+  }
+  delete[] fd_c_detection_result->boxes.data;
+  // delete scores
+  delete[] fd_c_detection_result->scores.data;
+  // delete label_ids
+  delete[] fd_c_detection_result->label_ids.data;
+  // delete masks
+  for (size_t i = 0; i < fd_c_detection_result->masks.size; i++) {
+    delete[] fd_c_detection_result->masks.data[i].data.data;
+    delete[] fd_c_detection_result->masks.data[i].shape.data;
+  }
+  delete fd_c_detection_result;
+}
+
+FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
+    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+  FD_C_DetectionResult* fd_c_detection_result = new FD_C_DetectionResult();
+  // copy boxes
+  const int boxes_coordinate_dim = 4;
+  fd_c_detection_result->boxes.size = detection_result->boxes.size();
+  fd_c_detection_result->boxes.data =
+      new FD_C_OneDimArrayFloat[fd_c_detection_result->boxes.size];
+  for (size_t i = 0; i < detection_result->boxes.size(); i++) {
+    fd_c_detection_result->boxes.data[i].size = boxes_coordinate_dim;
+    fd_c_detection_result->boxes.data[i].data = new float[boxes_coordinate_dim];
+    for (size_t j = 0; j < boxes_coordinate_dim; j++) {
+      fd_c_detection_result->boxes.data[i].data[j] =
+          detection_result->boxes[i][j];
+    }
+  }
+  // copy scores
+  fd_c_detection_result->scores.size = detection_result->scores.size();
+  fd_c_detection_result->scores.data =
+      new float[fd_c_detection_result->scores.size];
+  memcpy(fd_c_detection_result->scores.data, detection_result->scores.data(),
+         sizeof(float) * fd_c_detection_result->scores.size);
+  // copy label_ids
+  fd_c_detection_result->label_ids.size = detection_result->label_ids.size();
+  fd_c_detection_result->label_ids.data =
+      new int32_t[fd_c_detection_result->label_ids.size];
+  memcpy(fd_c_detection_result->label_ids.data,
+         detection_result->label_ids.data(),
+         sizeof(int32_t) * fd_c_detection_result->label_ids.size);
+  // copy masks
+  fd_c_detection_result->masks.size = detection_result->masks.size();
+  fd_c_detection_result->masks.data =
+      new FD_C_Mask[fd_c_detection_result->masks.size];
+  for (size_t i = 0; i < detection_result->masks.size(); i++) {
+    // copy data in mask
+    fd_c_detection_result->masks.data[i].data.size =
+        detection_result->masks[i].data.size();
+    fd_c_detection_result->masks.data[i].data.data =
+        new uint8_t[detection_result->masks[i].data.size()];
+    memcpy(fd_c_detection_result->masks.data[i].data.data,
+           detection_result->masks[i].data.data(),
+           sizeof(uint8_t) * detection_result->masks[i].data.size());
+    // copy shape in mask
+    fd_c_detection_result->masks.data[i].shape.size =
+        detection_result->masks[i].shape.size();
+    fd_c_detection_result->masks.data[i].shape.data =
+        new int64_t[detection_result->masks[i].shape.size()];
+    memcpy(fd_c_detection_result->masks.data[i].shape.data,
+           detection_result->masks[i].shape.data(),
+           sizeof(int64_t) * detection_result->masks[i].shape.size());
+    fd_c_detection_result->masks.data[i].type =
+        static_cast<FD_C_ResultType>(detection_result->masks[i].type);
+  }
+  fd_c_detection_result->contain_masks = detection_result->contain_masks;
+  fd_c_detection_result->type =
+      static_cast<FD_C_ResultType>(detection_result->type);
+  return fd_c_detection_result;
+}
+
+FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
+    __fd_keep FD_C_DetectionResult* fd_c_detection_result) {
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
+      FD_C_CreateDetectionResultWrapper();
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+
+  // copy boxes
+  const int boxes_coordinate_dim = 4;
+  detection_result->boxes.resize(fd_c_detection_result->boxes.size);
+  for (size_t i = 0; i < fd_c_detection_result->boxes.size; i++) {
+    for (size_t j = 0; j < boxes_coordinate_dim; j++) {
+      detection_result->boxes[i][j] =
+          fd_c_detection_result->boxes.data[i].data[j];
+    }
+  }
+  // copy scores
+  detection_result->scores.resize(fd_c_detection_result->scores.size);
+  memcpy(detection_result->scores.data(), fd_c_detection_result->scores.data,
+         sizeof(float) * fd_c_detection_result->scores.size);
+  // copy label_ids
+  detection_result->label_ids.resize(fd_c_detection_result->label_ids.size);
+  memcpy(detection_result->label_ids.data(),
+         fd_c_detection_result->label_ids.data,
+         sizeof(int32_t) * fd_c_detection_result->label_ids.size);
+  // copy masks
+  detection_result->masks.resize(fd_c_detection_result->masks.size);
+  for (size_t i = 0; i < fd_c_detection_result->masks.size; i++) {
+    // copy data in mask
+    detection_result->masks[i].data.resize(
+        fd_c_detection_result->masks.data[i].data.size);
+    memcpy(detection_result->masks[i].data.data(),
+           fd_c_detection_result->masks.data[i].data.data,
+           sizeof(uint8_t) * fd_c_detection_result->masks.data[i].data.size);
+    // copy shape in mask
+    detection_result->masks[i].shape.resize(
+        fd_c_detection_result->masks.data[i].shape.size);
+    memcpy(detection_result->masks[i].shape.data(),
+           fd_c_detection_result->masks.data[i].shape.data,
+           sizeof(int64_t) * fd_c_detection_result->masks.data[i].shape.size);
+    detection_result->masks[i].type =
+        static_cast<fastdeploy::vision::ResultType>(
+            fd_c_detection_result->masks.data[i].type);
+  }
+  detection_result->contain_masks = fd_c_detection_result->contain_masks;
+  detection_result->type =
+      static_cast<fastdeploy::vision::ResultType>(fd_c_detection_result->type);
+
+  return fd_c_detection_result_wrapper;
+}
+}
--- a/c_api/fastdeploy_capi/vision/result.h
+++ b/c_api/fastdeploy_capi/vision/result.h
@@ -0,0 +1,161 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
+
+typedef struct FD_C_ClassifyResultWrapper FD_C_ClassifyResultWrapper;
+typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+FD_ENUM(FD_C_ResultType){
+    UNKNOWN_RESULT,
+    CLASSIFY,
+    DETECTION,
+    SEGMENTATION,
+    OCR,
+    MOT,
+    FACE_DETECTION,
+    FACE_ALIGNMENT,
+    FACE_RECOGNITION,
+    MATTING,
+    MASK,
+    KEYPOINT_DETECTION,
+    HEADPOSE,
+};
+
+typedef struct FD_C_ClassifyResult {
+  FD_C_OneDimArrayInt32 label_ids;
+  FD_C_OneDimArrayFloat scores;
+  FD_C_ResultType type;
+} FD_C_ClassifyResult;
+
+typedef struct FD_C_Mask {
+  FD_C_OneDimArrayUint8 data;
+  FD_C_OneDimArrayInt64 shape;
+  FD_C_ResultType type;
+} FD_C_Mask;
+
+typedef struct FD_C_OneDimMask {
+  size_t size;
+  FD_C_Mask* data;
+} FD_C_OneDimMask;  // std::vector<FD_C_Mask>
+
+typedef struct FD_C_DetectionResult {
+  FD_C_TwoDimArrayFloat boxes;
+  FD_C_OneDimArrayFloat scores;
+  FD_C_OneDimArrayInt32 label_ids;
+  FD_C_OneDimMask masks;
+  FD_C_Bool contain_masks;
+  FD_C_ResultType type;
+} FD_C_DetectionResult;
+
+// Classification Results
+
+/** \brief Create a new FD_C_ClassifyResultWrapper object
+ *
+ * \return Return a pointer to FD_C_ClassifyResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper*
+FD_C_CreateClassifyResultWrapper();
+
+/** \brief Destroy a FD_C_ClassifyResultWrapper object
+ *
+ * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyClassifyResultWrapper(
+    __fd_take FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+
+/** \brief Destroy a FD_C_ClassifyResult object
+ *
+ * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_DestroyClassifyResult(__fd_take FD_C_ClassifyResult* fd_c_classify_result);
+
+/** \brief Get a FD_C_ClassifyResult object from FD_C_ClassifyResultWrapper object
+ *
+ * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object
+ * \return Return a pointer to FD_C_ClassifyResult object
+ */
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResult*
+FD_C_ClassifyResultWrapperGetData(
+    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+
+/** \brief Create a new FD_C_ClassifyResultWrapper object from FD_C_ClassifyResult object
+ *
+ * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object
+ * \return Return a pointer to FD_C_ClassifyResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper*
+FD_C_CreateClassifyResultWrapperFromData(
+    __fd_keep FD_C_ClassifyResult* fd_c_classify_result);
+
+// Detection Results
+
+/** \brief Create a new FD_C_DetectionResultWrapper object
+ *
+ * \return Return a pointer to FD_C_DetectionResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper*
+FD_C_CreateDetectionResultWrapper();
+
+/** \brief Destroy a FD_C_DetectionResultWrapper object
+ *
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyDetectionResultWrapper(
+    __fd_take FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+
+/** \brief Destroy a FD_C_DetectionResult object
+ *
+ * \param[in] fd_c_detection_result pointer to FD_C_DetectionResult object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyDetectionResult(
+    __fd_take FD_C_DetectionResult* fd_c_detection_result);
+
+/** \brief Get a FD_C_DetectionResult object from FD_C_DetectionResultWrapper object
+ *
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object
+ * \return Return a pointer to FD_C_DetectionResult object
+ */
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResult*
+FD_C_DetectionResultWrapperGetData(
+    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+
+/** \brief Create a new FD_C_DetectionResultWrapper object from FD_C_DetectionResult object
+ *
+ * \param[in] fd_c_detection_result pointer to FD_C_DetectionResult object
+ * \return Return a pointer to FD_C_DetectionResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper*
+FD_C_CreateDetectionResultWrapperFromData(
+    __fd_keep FD_C_DetectionResult* fd_c_detection_result);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/c_api/fastdeploy_capi/vision/visualize.cc
+++ b/c_api/fastdeploy_capi/vision/visualize.cc
@@ -0,0 +1,35 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/vision/visualize.h"
+
+#include "fastdeploy/vision/visualize/visualize.h"
+#include "fastdeploy_capi/types_internal.h"
+
+extern "C" {
+
+FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
+                           FD_C_DetectionResult* fd_c_detection_result,
+                           float score_threshold, int line_size,
+                           float font_size) {
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
+      FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result);
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+  cv::Mat result = fastdeploy::vision::Visualize::VisDetection(
+      *(reinterpret_cast<cv::Mat*>(im)), *detection_result, score_threshold,
+      line_size, font_size);
+  return new cv::Mat(result);
+}
+}
--- a/c_api/fastdeploy_capi/vision/visualize.h
+++ b/c_api/fastdeploy_capi/vision/visualize.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
+#include "fastdeploy_capi/vision/result.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Visualize Detection
+ *
+ * \return Return a pointer to cv::Mat object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat
+FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
+                  float score_threshold, int line_size, float font_size);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/cmake/ascend.cmake
+++ b/cmake/ascend.cmake
@@ -6,12 +6,12 @@ if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
  if (NOT BUILD_FASTDEPLOY_PYTHON)
    message(STATUS "Build FastDeploy Ascend C++ library on X86 platform.")
    if(NOT PADDLELITE_URL)
-      set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_x86_huawei_ascend_npu_0105.tgz")
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
    endif()
  else ()
    message(STATUS "Build FastDeploy Ascend Python library on X86 platform.")
    if(NOT PADDLELITE_URL)
-      set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_x86_huawei_ascend_npu_python_0105.tgz") 
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.x86.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz") 
    endif()
  endif()	
 endif()
@@ -21,12 +21,12 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
  if (NOT BUILD_FASTDEPLOY_PYTHON)
    message(STATUS "Build FastDeploy Ascend C++ library on aarch64 platform.")
    if(NOT PADDLELITE_URL)
-      set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_0118.tgz")
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.CPP.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
    endif()
  else ()
    message(STATUS "Build FastDeploy Ascend Python library on aarch64 platform.")
    if(NOT PADDLELITE_URL)
-      set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0118.tgz")
+      set(PADDLELITE_URL "https://paddle-qa.bj.bcebos.com/Paddle-Lite/DevelopDailyBuild/FastDeploy.Python.inference_lite_lib.ubuntu.armv8.huawei_ascend_npu.CANN5.1.RC2.alpha001.tar.gz")
    endif()
  endif()	
 endif()
--- a/cmake/fast_tokenizer.cmake
+++ b/cmake/fast_tokenizer.cmake
@@ -83,7 +83,8 @@ elseif(ANDROID)
  if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
     message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastDeploy for Android with FastTokenizer, but found ${ANDROID_TOOLCHAIN}.")
  endif()    
-  set(FASTTOKENIZER_FILE "fast_tokenizer-android-${ANDROID_ABI}-${FASTTOKENIZER_VERSION}.tgz")
+  # set(FASTTOKENIZER_FILE "fast_tokenizer-android-${ANDROID_ABI}-${FASTTOKENIZER_VERSION}.tgz")
+  set(FASTTOKENIZER_FILE "fast_tokenizer-lite-android-${ANDROID_ABI}-${FASTTOKENIZER_VERSION}.tgz")
 else()
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
--- a/cmake/paddlelite.cmake
+++ b/cmake/paddlelite.cmake
@@ -117,4 +117,4 @@ endif()

 add_library(external_paddle_lite STATIC IMPORTED GLOBAL)
 set_property(TARGET external_paddle_lite PROPERTY IMPORTED_LOCATION ${PADDLELITE_LIB})
-add_dependencies(external_paddle_lite ${PADDLELITE_PROJECT})
+add_dependencies(external_paddle_lite ${PADDLELITE_PROJECT})
--- a/cmake/poros.cmake
+++ b/cmake/poros.cmake
@@ -13,6 +13,10 @@
 # limitations under the License.
 include(ExternalProject)

+if(NOT ENABLE_TRT_BACKEND)
+  message(FATAL_ERROR "While ENABLE_POROS_BACKEND, requires ENABLE_TRT_BACKEND=ON, but now its OFF.")
+endif()
+
 set(POROS_PROJECT "extern_poros")
 set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
 set(POROS_SOURCE_DIR
@@ -48,9 +52,10 @@ else()
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
  else()
-    message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
    if(WITH_GPU)
        set(POROS_FILE "poros_manylinux_torch1.12.1_cu116_trt8.4_gcc82-${POROS_VERSION}.tar.gz")
+    else()
+      message(FATAL_ERROR "Poros currently only provides precompiled packages for the GPU version.")
    endif()
  endif()
 endif()
@@ -77,7 +82,7 @@ add_dependencies(external_poros ${POROS_PROJECT})
 # Download libtorch.so with ABI=1
 set(TORCH_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
 set(TORCH_FILE "libtorch-cxx11-abi-shared-with-deps-1.12.1-cu116.zip")
-set(TROCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
+set(TORCH_URL "${TORCH_URL_BASE}${TORCH_FILE}")
 message(STATUS "Use the default Torch lib from: ${TORCH_URL}")
 download_and_decompress(${TORCH_URL} ${CMAKE_CURRENT_BINARY_DIR}/${TORCH_FILE} ${THIRD_PARTY_PATH}/install)
 if(EXISTS ${THIRD_PARTY_PATH}/install/torch)
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -39,10 +39,12 @@ function(fastdeploy_summary)
  message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
+  message(STATUS "  ENABLE_BENCHMARK          : ${ENABLE_BENCHMARK}")
  message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
  message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
  message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
  message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
+  message(STATUS "  WITH_CAPI            : ${WITH_CAPI}")
  if(ENABLE_ORT_BACKEND)
    message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
  endif()
--- a/docs/cn/build_and_install/download_prebuilt_libraries.md
+++ b/docs/cn/build_and_install/download_prebuilt_libraries.md
@@ -86,8 +86,9 @@ Release版本
 | Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.3.tgz) | clang++ 10.0.0编译产出|
 | Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.3.tgz) | clang++ 13.0.0编译产出 |
 | Linux aarch64 | [fastdeploy-linux-aarch64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.3.tgz) | gcc 6.3编译产出 |  
-| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |      
-| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | 包含FastTokenizer、UIE等Text API，NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | CV API，NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | 包含 FastTokenizer、UIE 等 Text API，CV API，NDK 25 及 clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.3-shared.tgz) | 仅包含 FastTokenizer、UIE 等 Text API，NDK 25 及 clang++ 编译产出, 不包含 OpenCV 等 CV API。 支持 arm64-v8a 及 armeabi-v7a |

 ## Java SDK安装

@@ -95,8 +96,8 @@ Release版本（Java SDK 目前仅支持Android，版本为1.0.3）

 | 平台 | 文件 | 说明 |
 | :--- | :--- | :---- |
-| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |  
-| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | 包含FastTokenizer、UIE等Text API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | CV API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | 包含 FastTokenizer、UIE 等 Text API，CV API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |


 Develop版本（Nightly build）
@@ -108,7 +109,8 @@ Develop版本（Nightly build）
 | Mac OSX x64 | [fastdeploy-osx-x86_64-0.0.0.tgz](https://bj.bcebos.com/fastdeploy/dev/cpp/fastdeploy-osx-x86_64-0.0.0.tgz) | - |
 | Mac OSX arm64 | [fastdeploy-osx-arm64-0.0.0.tgz](https://fastdeploy.bj.bcebos.com/dev/cpp/fastdeploy-osx-arm64-0.0.0.tgz) | clang++ 13.0.0编译产出 |
 | Linux aarch64 | - | - |  
-| Android armv7&v8 | [fastdeploy-android-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-0.0.0-shared.tgz) | NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |  
-| Android armv7&v8 | [fastdeploy-android-with-text-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-0.0.0-shared.tgz) | 包含FastTokenizer、UIE等Text API，NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |  
-| Android Java SDK | [fastdeploy-android-sdk-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-0.0.0.aar) | NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |  
-| Android Java SDK | [fastdeploy-android-sdk-with-text-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-with-text-0.0.0.aar) | 包含FastTokenizer、UIE等Text API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android armv7&v8 | [fastdeploy-android-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-0.0.0-shared.tgz) | CV API，NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-0.0.0-shared.tgz) | 包含 FastTokenizer、UIE 等 Text API，CV API，NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-only-0.0.0-shared.tgz) | 仅包含 FastTokenizer、UIE 等 Text API，NDK 25及clang++编译产出，不包含 OpenCV 等 CV API。 支持arm64-v8a及armeabi-v7a |
+| Android Java SDK | [fastdeploy-android-sdk-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-0.0.0.aar) | CV API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-with-text-0.0.0.aar) | 包含 FastTokenizer、UIE 等 Text API，CV API，NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
--- a/docs/cn/build_and_install/huawei_ascend.md
+++ b/docs/cn/build_and_install/huawei_ascend.md
@@ -118,5 +118,13 @@ FastDeploy现在已经集成FlyCV, 用户可以在支持的硬件平台上使用


 ## 六.昇腾部署Demo参考
- 华为昇腾NPU 上使用C++部署 PaddleClas 分类模型请参考：[PaddleClas 华为升腾NPU C++ 部署示例](../../../examples/vision/classification/paddleclas/cpp/README.md)
- 华为昇腾NPU 上使用Python部署 PaddleClas 分类模型请参考：[PaddleClas 华为升腾NPU Python 部署示例](../../../examples/vision/classification/paddleclas/python/README.md)
+
+| 模型系列 | C++ 部署示例 | Python 部署示例 |
+| :-----------| :--------   | :--------------- |
+|   PaddleClas       |   [昇腾NPU C++ 部署示例](../../../examples/vision/classification/paddleclas/cpp/README_CN.md)       |    [昇腾NPU Python 部署示例](../../../examples/vision/classification/paddleclas/python/README_CN.md)          |  
+|   PaddleDetection  |      [昇腾NPU C++ 部署示例](../../../examples/vision/detection/paddledetection/cpp/README_CN.md)        |     [昇腾NPU Python 部署示例](../../../examples/vision/detection/paddledetection/python/README_CN.md)               |
+|   PaddleSeg        |      [昇腾NPU C++ 部署示例](../../../examples/vision/segmentation/paddleseg/cpp/README_CN.md)        |      [昇腾NPU Python 部署示例](../../../examples//vision/segmentation/paddleseg/python/README_CN.md)              |
+|   PaddleOCR        |     [昇腾NPU C++ 部署示例](../../../examples/vision/ocr/PP-OCRv3/cpp/README_CN.md)         |      [昇腾NPU Python 部署示例](../../../examples/vision//ocr/PP-OCRv3/python/README_CN.md)              |
+|   Yolov5           |      [昇腾NPU C++ 部署示例](../../../examples/vision/detection/yolov5/cpp/README_CN.md)       |       [昇腾NPU Python 部署示例](../../../examples/vision/detection/yolov5/python/README_CN.md)             |
+|   Yolov6           |      [昇腾NPU C++ 部署示例](../../../examples/vision/detection/yolov6/cpp/README_CN.md)        |       [昇腾NPU Python 部署示例](../../../examples/vision/detection/yolov6/python/README_CN.md)             |
+|   Yolov7           |      [昇腾NPU C++ 部署示例](../../../examples/vision/detection/yolov7/cpp/README_CN.md)        |       [昇腾NPU Python 部署示例](../../../examples/vision/detection/yolov7/python/README_CN.md)             |
--- a/docs/cn/build_and_install/rknpu2.md
+++ b/docs/cn/build_and_install/rknpu2.md
@@ -11,5 +11,5 @@ RKNPU2指的是Rockchip推出的RK356X以及RK3588系列芯片的NPU。
 * [RKNPU2开发环境搭建](../faq/rknpu2/environment.md)
 * [编译FastDeploy](../faq/rknpu2/build.md)
 * [RKNN模型导出建议](../faq/rknpu2/export.md)
-* [RKNPU2模型部署demo](../faq/rknpu2/rknpu2.md)
+* [RKNPU2模型速度一览表](../faq/rknpu2/rknpu2.md)
 * [RKNPU2 常见问题合集](../faq/rknpu2/issues.md)
--- a/docs/cn/faq/common_faq.md
+++ b/docs/cn/faq/common_faq.md
@@ -0,0 +1,4 @@
+# 常见问题
+
+1. Windows安装fastdeploy-python或fastdeploy-gpu-python后，执行`import fastdeploy`时，出现提示"DLL Load failed: 找不到指定模块"
+- **解决方式** 此问题原因可能在于系统没有安装VS动态库，在此页面根据个人环境下载安装后，重新import解决 https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170
--- a/docs/cn/faq/develop_a_new_model.md
+++ b/docs/cn/faq/develop_a_new_model.md
@@ -3,23 +3,22 @@

 # FastDeploy集成新模型流程

-在FastDeploy里面新增一个模型，包括增加C++/Python的部署支持。 本文以torchvision v0.12.0中的ResNet50模型为例，介绍使用FastDeploy做外部[模型集成](#modelsupport)，具体包括如下3步。
+在FastDeploy里面新增一个模型，包括增加C++/Python的部署支持。 本文以YOLOv7Face模型为例，介绍使用FastDeploy做外部[模型集成](#modelsupport)，具体包括如下3步。

 | 步骤 | 说明                                | 创建或修改的文件                            |
 |:------:|:-------------------------------------:|:---------------------------------------------:|
-| [1](#step2)    |  在fastdeploy/vision相应任务模块增加模型实现       | resnet.h、resnet.cc、vision.h                     |
-| [2](#step4)     | 通过pybind完成Python接口绑定 | resnet_pybind.cc、classification_pybind.cc |
-| [3](#step5)     | 实现Python相应调用接口    | resnet.py、\_\_init\_\_.py                        |
+| [1](#step2)    |  在fastdeploy/vision相应任务模块增加模型实现       | yolov7face.h、yolov7face.cc、preprocessor.h、preprocess.cc、postprocessor.h、postprocessor.cc、vision.h                     |
+| [2](#step4)     | 通过pybind完成Python接口绑定 | yolov7face_pybind.cc |
+| [3](#step5)     | 实现Python相应调用接口    | yolov7face.py、\_\_init\_\_.py                        |

 在完成上述3步之后，一个外部模型就集成好了。
 <br />
 如果您想为FastDeploy贡献代码，还需要为新增模型添加测试代码、说明文档和代码注释，可在[测试](#test)中查看。
 ## 模型集成     <span id="modelsupport"></span>

-### 模型准备  <span id="step1"></span>
+## 1、模型准备  <span id="step1"></span>

-
-在集成外部模型之前，先要将训练好的模型（.pt，.pdparams 等）转换成FastDeploy支持部署的模型格式（.onnx，.pdmodel）。多数开源仓库会提供模型转换脚本，可以直接利用脚本做模型的转换。由于torchvision没有提供转换脚本，因此手动编写转换脚本，本文中将 `torchvison.models.resnet50` 转换为 `resnet50.onnx`， 参考代码如下：
+在集成外部模型之前，先要将训练好的模型（.pt，.pdparams 等）转换成FastDeploy支持部署的模型格式（.onnx，.pdmodel）。多数开源仓库会提供模型转换脚本，可以直接利用脚本做模型的转换。例如yolov7face官方库提供的[export.py](https://github.com/derronqi/yolov7-face/blob/main/models/export.py)文件， 若官方库未提供转换导出文件，则需要手动编写转换脚本，如torchvision没有提供转换脚本，因此手动编写转换脚本，下文中将 `torchvison.models.resnet50` 转换为 `resnet50.onnx`，参考代码如下：

 ```python
 import torch
@@ -41,57 +40,139 @@ torch.onnx.export(model,
 ```
 执行上述脚本将会得到 `resnet50.onnx` 文件。

-### C++部分  <span id="step2"></span>
-* 创建`resnet.h`文件
+## 2、CPP代码实现  <span id="step2"></span>
+### 2.1、前处理类实现 
+* 创建`preprocessor.h`文件
  * 创建位置
-    * FastDeploy/fastdeploy/vision/classification/contrib/resnet.h (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名.h)
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/preprocess.h (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/precessor.h)
  * 创建内容
-    * 首先在resnet.h中创建 ResNet类并继承FastDeployModel父类，之后声明`Predict`、`Initialize`、`Preprocess`、`Postprocess`和`构造函数`，以及必要的变量，具体的代码细节请参考[resnet.h](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-69128489e918f305c208476ba793d8167e77de2aa7cadf5dcbac30da448bd28e)。
+    * 首先在preprocess.h中创建 Yolov7FacePreprocess 类,之后声明`Run`、`preprocess`、`LetterBox`和`构造函数`，以及必要的变量及其`set`和`get`方法，具体的代码细节请参考[preprocess.h](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.h)。

 ```C++
-class FASTDEPLOY_DECL ResNet : public FastDeployModel {
+class FASTDEPLOY_DECL Yolov7FacePreprocessor {
 public:
-  ResNet(...);
-  virtual bool Predict(...);
- private:
-  bool Initialize();
+  Yolov7FacePreprocessor(...);
+  bool Run(...);
+ protected:
  bool Preprocess(...);
-  bool Postprocess(...);
+  void LetterBox(...);
 };
 ```

-* 创建`resnet.cc`文件
+* 创建`preprocessor.cc`文件
  * 创建位置
-    * FastDeploy/fastdeploy/vision/classification/contrib/resnet.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名.cc)
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/preprocessor.cc)
  * 创建内容
-    * 在`resnet.cc`中实现`resnet.h`中声明函数的具体逻辑，其中`PreProcess` 和 `PostProcess`需要参考源官方库的前后处理逻辑复现，ResNet每个函数具体逻辑如下，具体的代码请参考[resnet.cc](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-d229d702de28345253a53f2a5839fd2c638f3d32fffa6a7d04d23db9da13a871)。
+    * 在`preprocessor.cc`中实现`preprocessor.h`中声明函数的具体逻辑，其中`Preprocess`需要参考源官方库的前后处理逻辑复现，preprocessor每个函数具体逻辑如下，具体的代码请参考[preprocessor.cc](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc)。

 ```C++
-ResNet::ResNet(...) {
+Yolov7FacePreprocessor::Yolov7FacePreprocessor(...) {
+  // 构造函数逻辑
+  // 全局变量赋值
+}
+bool Yolov7FacePreprocessor::Run() {
+  // 执行前处理
+  // 根据传入图片数量对每张图片进行处理，通过循环的方式将每张图片传入Preprocess函数进行预处理,
+  // 即Preprocess为处理单元，Run方法为每张图片调用处理单元处理
+  return true;
+}
+bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
+                                        std::map<std::string, std::array<float, 2>>* im_info) {
+// 前处理逻辑
+// 1. LetterBox 2. convert and permute 3. 处理结果存入 FDTensor类中  
+  return true;
+}
+void Yolov7FacePreprocessor::LetterBox(FDMat* mat) {
+  //LetterBox
+  return true;
+}
+```
+
+### 2.2、后处理类实现
+* 创建`postprocessor.h`文件
+  * 创建位置
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/postprocessor.h)
+  * 创建内容
+    * 首先在postprocess.h中创建 Yolov7FacePostprocess 类,之后声明`Run`和`构造函数`，以及必要的变量及其`set`和`get`方法，具体的代码细节请参考[postprocessor.h](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h)。
+
+```C++
+class FASTDEPLOY_DECL Yolov7FacePostprocessor {
+ public:
+  Yolov7FacePostprocessor(...);
+  bool Run(...);
+};
+```
+
+* 创建`postprocessor.cc`文件
+  * 创建位置
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/postprocessor.cc)
+  * 创建内容
+    * 在`postprocessor.cc`中实现`postprocessor.h`中声明函数的具体逻辑，其中`Postprocess`需要参考源官方库的前后处理逻辑复现，postprocessor每个函数具体逻辑如下，具体的代码请参考[postprocessor.cc](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc)。
+
+```C++
+Yolov7FacePostprocessor::Yolov7FacePostprocessor(...) {
+  // 构造函数逻辑
+  // 全局变量赋值
+}
+bool Yolov7FacePostprocessor::Run() {
+  // 后处理逻辑
+  // 1. Padding 2. Choose box by conf_threshold 3. NMS 4. 结果存入 FaceDetectionResult类
+  return true;
+}
+
+```
+### 2.3、YOLOv7Face实现
+* 创建`yolov7face.h`文件
+  * 创建位置
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face.h (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/模型名.h)
+  * 创建内容
+    * 首先在yolov7face.h中创建 YOLOv7Face 类并继承FastDeployModel父类，之后声明`Predict`、`BatchPredict`、`Initialize`和`构造函数`，以及必要的变量及其`get`方法，具体的代码细节请参考[yolov7face.h](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face.h)。
+
+```C++
+class FASTDEPLOY_DECL YOLOv7Face : public FastDeployModel {
+ public:
+  YOLOv7Face(...);
+  virtual bool Predict(...);
+  virtual bool BatchPredict(...);
+ protected:
+  bool Initialize();
+  Yolov7FacePreprocessor preprocessor_;
+  Yolov7FacePostprocessor postprocessor_;
+};
+```
+
+* 创建`yolov7face.cc`文件
+  * 创建位置
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/模型名.cc)
+  * 创建内容
+    * 在`yolov7face.cc`中实现`yolov7face.h`中声明函数的具体逻辑，YOLOv7Face每个函数具体逻辑如下，具体的代码请参考[yolov7face.cc](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face.cc)。
+
+```C++
+YOLOv7Face::YOLOv7Face(...) {
  // 构造函数逻辑
  // 1. 指定 Backend 2. 设置RuntimeOption 3. 调用Initialize()函数
 }
-bool ResNet::Initialize() {
+bool YOLOv7Face::Initialize() {
  // 初始化逻辑
  // 1. 全局变量赋值 2. 调用InitRuntime()函数
  return true;
 }
-bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
-// 前处理逻辑
-// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. 处理结果存入 FDTensor类中  
+bool YOLOv7Face::Predict(const cv::Mat& im, FaceDetectionResult* result) {
+  std::vector<FaceDetectionResult> results;
+  if (!BatchPredict({im}, &results)) {
+    return false;
+  }
+  *result = std::move(results[0]);
  return true;
 }
-bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result, int topk) {
-  //后处理逻辑
-  // 1. Softmax 2. Choose topk labels 3. 结果存入 ClassifyResult类
-  return true;
-}
-bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
+// Predict是对单张图片进行预测，通过将含有一张图片的数组送入BatchPredict实现
+bool YOLOv7Face::BatchPredict(const std::vector<cv::Mat>& images, std::vector<FaceDetectionResult>* result) {
  Preprocess(...)
  Infer(...)
  Postprocess(...)
  return true;
 }
+// BatchPredict为对批量图片进行预测，接收一个含有若干张图片的动态数组vector
 ```
 <span id="step3"></span>
 * 在`vision.h`文件中加入新增模型文件
@@ -101,77 +182,116 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {

 ```C++
 #ifdef ENABLE_VISION
-#include "fastdeploy/vision/classification/contrib/resnet.h"
+#include "fastdeploy/vision/facedet/contrib/yolov7face.h"
 #endif
 ```

+## 3、Python接口封装

-### Pybind部分  <span id="step4"></span>
+### 3.1、Pybind部分  <span id="step4"></span>

 * 创建Pybind文件  
  * 创建位置
-    * FastDeploy/fastdeploy/vision/classification/contrib/resnet_pybind.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名_pybind.cc)
+    * FastDeploy/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/外部模型/模型名/模型名_pybind.cc)
  * 创建内容
-    * 利用Pybind将C++中的函数变量绑定到Python中，具体代码请参考[resnet_pybind.cc](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-270af0d65720310e2cfbd5373c391b2110d65c0f4efa547f7b7eeffcb958bdec)。
+    * 利用Pybind将C++中的函数变量绑定到Python中，具体代码请参考[yolov7face_pybind.cc](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc)。
 ```C++
-void BindResNet(pybind11::module& m) {
-  pybind11::class_<vision::classification::ResNet, FastDeployModel>(
-      m, "ResNet")
+void BindYOLOv7Face(pybind11::module& m) {
+  pybind11::class_<vision::facedet::YOLOv7Face, FastDeployModel>(
+      m, "YOLOv7Face")
      .def(pybind11::init<std::string, std::string, RuntimeOption, ModelFormat>())
      .def("predict", ...)
-      .def_readwrite("size", &vision::classification::ResNet::size)
-      .def_readwrite("mean_vals", &vision::classification::ResNet::mean_vals)
-      .def_readwrite("std_vals", &vision::classification::ResNet::std_vals);
+      .def("batch_predict", ...)
+      .def_property_readonly("preprocessor", ...)
+      .def_property_readonly("postprocessor", ...);
+  pybind11::class_<vision::facedet::Yolov7FacePreprocessor>(
+    m, "Yolov7FacePreprocessor")
+    .def(pybind11::init<>())
+    .def("run", ...)
+    .def_property("size", ...)
+    .def_property("padding_color_value", ...)
+    .def_property("is_scale_up", ...);
+  pybind11::class_<vision::facedet::Yolov7FacePostprocessor>(
+    m, "Yolov7FacePostprocessor")
+    .def(pybind11::init<>())
+    .def("run", ...)
+    .def_property("conf_threshold", ...)
+    .def_property("nms_threshold", ...);
 }
 ```

 * 调用Pybind函数
  * 修改位置
-    * FastDeploy/fastdeploy/vision/classification/classification_pybind.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/任务名称}_pybind.cc)
+    * FastDeploy/fastdeploy/vision/facedet/facedet_pybind.cc (FastDeploy/C++代码存放位置/视觉模型/任务名称/任务名称}_pybind.cc)
  * 修改内容
 ```C++
-void BindResNet(pybind11::module& m);
-void BindClassification(pybind11::module& m) {
-  auto classification_module =
-      m.def_submodule("classification", "Image classification models.");
-  BindResNet(classification_module);
+void BindYOLOv7Face(pybind11::module& m);
+void BindFaceDet(pybind11::module& m) {
+  auto facedet_module =
+      m.def_submodule("facedet", "Face detection models.");
+  BindYOLOv7Face(facedet_module);
 }
 ```

-
-### Python部分  <span id="step5"></span>
-
-
-* 创建`resnet.py`文件
+### 3.2、python部分 <span id="step5"></span>
+* 创建`yolov7face.py`文件
  * 创建位置
-    * FastDeploy/python/fastdeploy/vision/classification/contrib/resnet.py (FastDeploy/Python代码存放位置/fastdeploy/视觉模型/任务名称/外部模型/模型名.py)
+    * FastDeploy/python/fastdeploy/vision/facedet/contrib/yolov7face.py (FastDeploy/Python代码存放位置/fastdeploy/视觉模型/任务名称/外部模型/模型名.py)
  * 创建内容
-    * 创建ResNet类继承自FastDeployModel，实现 `\_\_init\_\_`、Pybind绑定的函数（如`predict()`）、以及`对Pybind绑定的全局变量进行赋值和获取的函数`，具体代码请参考[resnet.py](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-a4dc5ec2d450e91f1c03819bf314c238b37ac678df56d7dea3aab7feac10a157)。
+    * 创建YOLOv7Face类继承自FastDeployModel、preprocess以及postprocess类，实现 `\_\_init\_\_`、Pybind绑定的函数（如`predict()`）、以及`对Pybind绑定的全局变量进行赋值和获取的函数`，具体代码请参考[yolov7face.py](https://github.com/PaddlePaddle/FastDeploy/tree/develop/python/fastdeploy/vision/facedet/contrib/yolov7face.py)。

 ```python
-class ResNet(FastDeployModel):
+class YOLOv7Face(FastDeployModel):
    def __init__(self, ...):
-        self._model = C.vision.classification.ResNet(...)
-    def predict(self, input_image, topk=1):
-        return self._model.predict(input_image, topk)
+        self._model = C.vision.facedet.YOLOv7Face(...)
+    def predict(self, input_image):
+        return self._model.predict(input_image)
+    def batch_predict(self, images):
+        return self._model.batch_predict(images)
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+class Yolov7FacePreprocessor():
+    def __init__(self, ...):
+        self._model = C.vision.facedet.Yolov7FacePreprocessor(...)
+    def run(self, input_ims):
+        return self._preprocessor.run(input_ims)
    @property
    def size(self):
-        return self._model.size
-    @size.setter
-    def size(self, wh):
-        ...
+        return self._preprocessor.size
+    @property
+    def padding_color_value(self):
+        return self._preprocessor.padding_color_value
+    ...
+
+class Yolov7FacePreprocessor():
+    def __init__(self, ...):
+        self._model = C.vision.facedet.Yolov7FacePostprocessor(...)
+    def run(self, ...):
+        return self._postprocessor.run(...)
+    @property
+    def conf_threshold(self):
+        return self._postprocessor.conf_threshold
+    @property
+    def nms_threshold(self):
+        return self._postprocessor.nms_threshold
+    ...
 ```
 <span id="step6"></span>
-* 导入ResNet类
+* 导入YOLOv7Face、Yolov7FacePreprocessor、Yolov7facePostprocessor类
  * 修改位置
-    * FastDeploy/python/fastdeploy/vision/classification/\_\_init\_\_.py (FastDeploy/Python代码存放位置/fastdeploy/视觉模型/任务名称/\_\_init\_\_.py)
+    * FastDeploy/python/fastdeploy/vision/facedet/\_\_init\_\_.py (FastDeploy/Python代码存放位置/fastdeploy/视觉模型/任务名称/\_\_init\_\_.py)
  * 修改内容

 ```Python
-from .contrib.resnet import ResNet
+from .contrib.yolov7face import *
 ```

-## 测试  <span id="test"></span>
+## 4、测试  <span id="test"></span>
 ### 编译
  * C++
    * 位置：FastDeploy/
@@ -203,8 +323,8 @@ cd dist
 pip install fastdeploy_gpu_python-版本号-cpxx-cpxxm-系统架构.whl
 ```

-### 编写测试代码
-  * 创建位置: FastDeploy/examples/vision/classification/resnet/ (FastDeploy/示例目录/视觉模型/任务名称/模型名/)
+## 5、示例代码开发
+  * 创建位置: FastDeploy/examples/vision/facedet/yolov7face/ (FastDeploy/示例目录/视觉模型/任务名称/模型名/)
  * 创建目录结构

 ```
@@ -220,9 +340,9 @@ pip install fastdeploy_gpu_python-版本号-cpxx-cpxxm-系统架构.whl
 ```

 * C++
-  * 编写CmakeLists文件、C++ 代码以及 README.md 内容请参考[cpp/](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-afcbe607b796509581f89e38b84190717f1eeda2df0419a2ac9034197ead5f96)。
+  * 编写CmakeLists文件、C++ 代码以及 README.md 内容请参考[cpp/](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/vision/facedet/yolov7face/cpp)。
  * 编译 infer.cc
-    * 位置：FastDeploy/examples/vision/classification/resnet/cpp/
+    * 位置：FastDeploy/examples/vision/facedet/yolov7face/cpp/

 ```
 mkdir build & cd build
@@ -231,38 +351,36 @@ make
 ```

 * Python
-  * Python 代码以及 README.md 内容请参考[python/](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-5a0d6be8c603a8b81454ac14c17fb93555288d9adf92bbe40454449309700135)。
+  * Python 代码以及 README.md 内容请参考[python/](https://github.com/PaddlePaddle/FastDeploy/tree/develop/examples/vision/facedet/yolov7face/python)。

 ### 为代码添加注释
 为了方便用户理解代码，我们需要为新增代码添加注释，添加注释方法可参考如下示例。
 - C++ 代码
-您需要在resnet.h文件中为函数和变量增加注释，有如下三种注释方式，具体可参考[resnet.h](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-69128489e918f305c208476ba793d8167e77de2aa7cadf5dcbac30da448bd28e)。
+您需要在resnet.h文件中为函数和变量增加注释，有如下三种注释方式，具体可参考[yolov7face.h](https://github.com/PaddlePaddle/FastDeploy/tree/develop/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face.h)。

 ```C++
 /** \brief Predict for the input "im", the result will be saved in "result".
 *
 * \param[in] im Input image for inference.
 * \param[in] result Saving the inference result.
-* \param[in] topk The length of return values, e.g., if topk==2, the result will include the 2 most possible class label for input image.
 */
-virtual bool Predict(cv::Mat* im, ClassifyResult* result, int topk = 1);
+virtual bool Predict(const cv::Mat& im, FaceDetectionResult* result);
 /// Tuple of (width, height)
 std::vector<int> size;
-/*! @brief Initialize for ResNet model, assign values to the global variables and call InitRuntime()
+/*! @brief Initialize for YOLOv7Face model, assign values to the global variables and call InitRuntime()
 */
 bool Initialize();
 ```
 - Python 代码
-你需要为resnet.py文件中的函数和变量增加适当的注释，示例如下，具体可参考[resnet.py](https://github.com/PaddlePaddle/FastDeploy/pull/347/files#diff-a4dc5ec2d450e91f1c03819bf314c238b37ac678df56d7dea3aab7feac10a157)。
+你需要为yolov7face.py文件中的函数和变量增加适当的注释，示例如下，具体可参考[yolov7face.py](https://github.com/PaddlePaddle/FastDeploy/tree/develop/python/fastdeploy/vision/facedet/contrib/yolov7face.py)。

 ```python  
-  def predict(self, input_image, topk=1):
-    """Classify an input image
-    :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
-    :param topk: (int)The topk result by the classify confidence score, default 1
-    :return: ClassifyResult
-    """
-    return self._model.predict(input_image, topk)
+    def predict(self, input_image):
+         """Detect the location and key points of human faces from an input image
+         :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+         :return: FaceDetectionResult
+         """
+         return self._model.predict(input_image)
 ```

 对于集成模型过程中的其他文件，您也可以对实现的细节添加适当的注释说明。
--- a/docs/cn/faq/rknpu2/build.md
+++ b/docs/cn/faq/rknpu2/build.md
@@ -1,3 +1,4 @@
+[English](../../../en/faq/rknpu2/build.md) | 中文
 # FastDeploy RKNPU2引擎编译

 ## FastDeploy后端支持详情
--- a/docs/cn/faq/rknpu2/environment.md
+++ b/docs/cn/faq/rknpu2/environment.md
@@ -1,3 +1,4 @@
+[English](../../../en/faq/rknpu2/environment.md) | 中文
 # FastDeploy RKNPU2推理环境搭建

 ## 简介
--- a/docs/cn/faq/rknpu2/issues.md
+++ b/docs/cn/faq/rknpu2/issues.md
@@ -1,3 +1,4 @@
+[English](../../../en/faq/rknpu2/issues.md) | 中文
 # RKNPU2常见问题合集

 在使用FastDeploy的过程中大家可能会碰到很多的问题，这个文档用来记录已经解决的共性问题，方便大家查阅。
--- a/docs/cn/faq/rknpu2/rknpu2.md
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -13,18 +13,20 @@ ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型
 * ARM CPU使用ONNX框架进行测试
 * NPU均使用单核进行测试

-| 任务场景                 | 模型                                                                                       | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
-|----------------------|------------------------------------------------------------------------------------------|--------------------------|--------------------|
-| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)        | Picodet-s                | 162/112            |
-| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                       | YOLOV5-S-Relu(int8)      | -/57               |
-| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                        | -                        | -/-                |
-| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                       | -                        | -/-                |
-| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)              | Unet-cityscapes          | -/-                |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8)           | 133/43             |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8)              | 133/43             |
-| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                      | SCRFD-2.5G-kps-640(int8) | 108/42             |
-| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)        | ms1mv3_arcface_r18(int8) | 81/12              |
-| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)         | ResNet50_vd              | -/33               |
+| 任务场景                 | 模型                                                                                               | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
+|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|--------------------|
+| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 162/112            |
+| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | -/100              |
+| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | -/77               |
+| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | -/57               |
+| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | -                        | -/-                |
+| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | -                        | -/-                |
+| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | -/-                |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 133/43             |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 133/43             |
+| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 108/42             |
+| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 81/12              |
+| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | -/33               |

 ## 预编译库下载

--- a/docs/cn/faq/usage_of_fastdeploy_init_bat.md
+++ b/docs/cn/faq/usage_of_fastdeploy_init_bat.md
@@ -0,0 +1,107 @@
+# fastdeploy_init.bat工具使用方式
+
+<div id="CommandLineDeps"></div>  
+
+## 1 方式一：使用 fastdeploy_init.bat 进行配置（推荐）  
+<div id="CommandLineDeps1"></div>  
+
+对于版本高于0.2.1的SDK，我们提供了 **fastdeploy_init.bat** 工具来管理FastDeploy中所有的依赖库。可以通过该脚本工具查看(show)、拷贝(install) 和 设置(init and setup) SDK中所有的dll，方便用户快速完成运行时环境配置。
+
+### 1.1 fastdeploy_init.bat 使用说明  
+<div id="CommandLineDeps11"></div>  
+
+首先进入SDK的根目录，运行以下命令，可以查看 fastdeploy_init.bat 的用法说明
+```bat
+D:\path-to-your-fastdeploy-sdk-dir>fastdeploy_init.bat help
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[1] [help]    print help information:                      fastdeploy_init.bat help
+[2] [show]    show all dlls/libs/include paths:            fastdeploy_init.bat show fastdeploy-sdk-dir
+[3] [init]    init all dlls paths for current terminal:    fastdeploy_init.bat init fastdeploy-sdk-dir  [WARNING: need copy onnxruntime.dll manually]
+[4] [setup]   setup path env for current terminal:         fastdeploy_init.bat setup fastdeploy-sdk-dir [WARNING: need copy onnxruntime.dll manually]
+[5] [install] install all dlls to a specific dir:          fastdeploy_init.bat install fastdeploy-sdk-dir another-dir-to-install-dlls **[RECOMMEND]**
+[6] [install] install all dlls with logging infos:         fastdeploy_init.bat install fastdeploy-sdk-dir another-dir-to-install-dlls info
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+```  
+用法简要说明如下：  
+- help:     打印所有的用法说明  
+- show:     查看SDK中所有的 dll、lib 和 include 路径
+- init:     初始化所有dll路径信息，后续用于设置terminal环境变量（不推荐，请参考4.3中关于onnxruntime的说明）
+- setup:    在init之后运行，设置terminal环境便令（不推荐，请参考4.3中关于onnxruntime的说明）  
+- install:  将SDK中所有的dll安装到某个指定的目录（推荐）
+
+### 1.2  fastdeploy_init.bat 查看 SDK 中所有的 dll、lib 和 include 路径  
+<div id="CommandLineDeps12"></div>  
+
+进入SDK的根目录，运行show命令，可以查看SDK中所有的 dll、lib 和 include 路径。以下命令中 %cd% 表示当前目录（SDK的根目录）。  
+```bat
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat show %cd%
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[SDK] D:\path-to-fastdeploy-sdk-dir
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[DLL] D:\path-to-fastdeploy-sdk-dir\lib\fastdeploy.dll **[NEEDED]**
+[DLL] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\lib\core_tokenizers.dll  **[NEEDED]**
+[DLL] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\x64\vc15\bin\opencv_ffmpeg3416_64.dll  **[NEEDED]**
+......
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[Lib] D:\path-to-fastdeploy-sdk-dir\lib\fastdeploy.lib **[NEEDED][fastdeploy]**
+[Lib] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\lib\core_tokenizers.lib  **[NEEDED][fastdeploy::text]**
+[Lib] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\x64\vc15\lib\opencv_world3416.lib  **[NEEDED][fastdeploy::vision]**
+......
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[Include] D:\path-to-fastdeploy-sdk-dir\include **[NEEDED][fastdeploy]**
+[Include] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\include  **[NEEDED][fastdeploy::text]**
+[Include] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\include  **[NEEDED][fastdeploy::vision]**
+......
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+[XML] D:\path-to-fastdeploy-sdk-dir\third_libs\install\openvino\runtime\bin\plugins.xml **[NEEDED]**
+------------------------------------------------------------------------------------------------------------------------------------------------------------
+```  
+可以看到该命令会根据您当前的SDK，输出对应的信息，包含 dll、lib 和 include 的路径信息。对于 dll，被标记为 `[NEEDED]`的，是运行时所需要的，如果包含OpenVINO后端，还需要将他的plugins.xml拷贝到exe所在的目录；对于 lib 和 include，被标记为`[NEEDED]`的，是开发时所需要配置的最小依赖。并且，我们还增加了对应的API Tag标记，如果您只使用vision API，则只需要配置标记为 `[NEEDED][fastdeploy::vision]` 的 lib 和 include 路径.  
+
+### 1.3 fastdeploy_init.bat 安装 SDK 中所有的 dll 到指定的目录 （推荐）
+<div id="CommandLineDeps13"></div>  
+
+进入SDK的根目录，运行install命令，可以将SDK 中所有的 dll 安装到指定的目录（如exe所在的目录）。我们推荐这种方式来配置exe运行所需要的依赖库。比如，可以在SDK根目录下创建一个临时的bin目录备份所有的dll文件。以下命令中 %cd% 表示当前目录（SDK的根目录）。
+```bat  
+% info参数为可选参数，添加info参数后会打印详细的安装信息 %
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin info
+```
+```bat
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin
+[INFO] Do you want to install all FastDeploy dlls ?
+[INFO] From: D:\path-to-fastdeploy-sdk-dir
+[INFO]   To: bin
+Choose y means YES, n means NO: [y/n]y
+YES.
+请按任意键继续. . .
+[INFO] Created bin done!
+已复制         1 个文件。
+已复制         1 个文件。
+已复制         1 个文件。
+已复制         1 个文件。
+.....
+已复制         1 个文件。
+已复制         1 个文件。
+已复制         1 个文件。
+已复制         1 个文件。
+.....
+```  
+### 1.4 fastdeploy_init.bat 配置 SDK 环境变量  
+<div id="CommandLineDeps14"></div>  
+
+您也可以选择通过配置环境变量的方式来设置运行时的依赖库环境，这种方式只在当前的terminal有效。如果您使用的SDK中包含了onnxruntime推理后端，我们不推荐这种方式，详细原因请参考（4.3）中关于onnxruntime配置的说明（需要手动拷贝onnxruntime所有的dll到exe所在的目录）。配置 SDK 环境变量的方式如下。以下命令中 %cd% 表示当前目录（SDK的根目录）。
+```bat
+% 先运行 init 初始化当前SDK所有的dll文件路径 %
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat init %cd%
+% 再运行 setup 完成 SDK 环境变量配置  %
+D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat setup %cd%
+```
+
+## 2 方式二：修改CMakeLists.txt，一行命令配置（推荐）
+<div id="CommandLineDeps2"></div>  
+
+考虑到Windows下C++开发的特殊性，如经常需要拷贝所有的lib或dll文件到某个指定的目录，FastDeploy提供了`install_fastdeploy_libraries`的cmake函数，方便用户快速配置所有的dll。修改ppyoloe的CMakeLists.txt，添加：  
+```cmake
+install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
+```
--- a/docs/cn/faq/use_sdk_on_windows.md
+++ b/docs/cn/faq/use_sdk_on_windows.md
@@ -2,37 +2,9 @@

 # 在 Windows 使用 FastDeploy C++ SDK

-## 目录
- [1. 环境依赖](#Environment)  
- [2. 下载 FastDeploy Windows 10 C++ SDK](#Download)  
- [3. Windows下多种方式使用 C++ SDK 的方式](#CommandLine)
-  - [3.1 命令行方式使用 C++ SDK](#CommandLine)  
-    - [3.1.1 在 Windows 命令行终端 上编译 example](#CommandLine)  
-    - [3.1.2 运行可执行文件获得推理结果](#CommandLine)  
-  - [3.2 Visual Studio 2019 创建sln工程使用 C++ SDK](#VisualStudio2019Sln)  
-    - [3.2.1 Visual Studio 2019 创建 sln 工程项目](#VisualStudio2019Sln1)  
-    - [3.2.2 从examples中拷贝infer_ppyoloe.cc的代码到工程](#VisualStudio2019Sln2)  
-    - [3.2.3 将工程配置设置成"Release x64"配置](#VisualStudio2019Sln3)  
-    - [3.2.4 配置头文件include路径](#VisualStudio2019Sln4)  
-    - [3.2.5 配置lib路径和添加库文件](#VisualStudio2019Sln5)  
-    - [3.2.6 编译工程并运行获取结果](#VisualStudio2019Sln6)
-  - [3.3 Visual Studio 2019 创建CMake工程使用 C++ SDK](#VisualStudio2019)
-    - [3.3.1 Visual Studio 2019 创建CMake工程项目](#VisualStudio20191)  
-    - [3.3.2 在CMakeLists中配置 FastDeploy C++ SDK](#VisualStudio20192)  
-    - [3.3.3 生成工程缓存并修改CMakeSetting.json配置](#VisualStudio20193)  
-    - [3.3.4 生成可执行文件，运行获取结果](#VisualStudio20194)  
- [4. 多种方法配置exe运行时所需的依赖库](#CommandLineDeps1)
-  - [4.1 使用 fastdeploy_init.bat 进行配置（推荐）](#CommandLineDeps1)
-    - [4.1.1 fastdeploy_init.bat 使用说明](#CommandLineDeps11)
-    - [4.1.2 fastdeploy_init.bat 查看 SDK 中所有的 dll、lib 和 include 路径](#CommandLineDeps12)
-    - [4.1.3 fastdeploy_init.bat 安装 SDK 中所有的 dll 到指定的目录](#CommandLineDeps13)
-    - [4.1.4 fastdeploy_init.bat 配置 SDK 环境变量](#CommandLineDeps14)
-  - [4.2 修改 CMakeLists.txt，一行命令配置（推荐）](#CommandLineDeps2)  
-  - [4.3 命令行设置环境变量](#CommandLineDeps3)  
-  - [4.4 手动拷贝依赖库到exe的目录下](#CommandLineDeps4)  
+【**注意**】**编译只支持Release模型，不支持Debug模型**

-
-## 1. 环境依赖
+## 1. 准备环境和Windows部署库
 <div id="Environment"></div>  

 - cmake >= 3.12
@@ -40,468 +12,50 @@
 - cuda >= 11.2 (当WITH_GPU=ON)
 - cudnn >= 8.0 (当WITH_GPU=ON)

-## 2. 下载 FastDeploy Windows 10 C++ SDK
-<div id="Download"></div>  

-### 2.1 下载预编译库或者从源码编译最新的SDK
-可以从以下链接下载编译好的 FastDeploy Windows 10 C++ SDK，SDK中包含了examples代码。
-```text
-https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.2.1.zip
-```
-源码编译请参考: [build_and_install](../build_and_install)
-### 2.2 准备模型文件和测试图片
-可以从以下链接下载模型文件和测试图片，并解压缩
-```text
-https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz # (下载后解压缩)
-https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
-```
+1. 根据需求，选择下载对应的C++(CPU/GPU)部署库，下载文档见[安装文档说明](../build_and_install)
+> 假定当前下载解压后的库路径在`D:\Download\fastdeploy-win-x64-gpu-x.x.x
+2. 下载如下模型文件和测试图片
+> https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz # (下载后解压缩)
+> https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

-## 3. Windows下多种方式使用 C++ SDK 的方式
-### 3.1 SDK使用方式一：命令行方式使用 C++ SDK
-<div id="CommandLine"></div>  
+## 2. 编译示例代码
+
+本文档编译的示例代码可在解压的库中找到，编译工具依赖VS 2019的安装，**Windows打开x64 Native Tools Command Prompt for VS 2019命令工具**，通过如下命令开始编译
+
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\cpp

-#### 3.1.1 在 Windows 上编译 PPYOLOE
-Windows菜单打开`x64 Native Tools Command Prompt for VS 2019`命令工具，cd到ppyoloe的demo路径  
-```bat  
-cd fastdeploy-win-x64-gpu-0.2.1\examples\vision\detection\paddledetection\cpp
-```
-```bat
 mkdir build && cd build
-cmake .. -G "Visual Studio 16 2019" -A x64 -DFASTDEPLOY_INSTALL_DIR=%cd%\..\..\..\..\..\..\..\fastdeploy-win-x64-gpu-0.2.1 -DCUDA_DIRECTORY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2"
-```
-然后执行
-```bat
+cmake .. -G "Visual Studio 16 2019" -A x64 -DFASTDEPLOY_INSTALL_DIR=D:\Download\fastdeploy-win-x64-gpu-x.x.x -DCUDA_DIRECTORY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2"
+
 msbuild infer_demo.sln /m:4 /p:Configuration=Release /p:Platform=x64
 ```

-#### 3.1.2 运行 demo
-```bat
+如需使用Visual Studio 2019创建sln工程，或者CMake工程等方式编译，可参考如下文档
+- [FastDeploy C++库在Windows上的多种使用方式](./use_sdk_on_windows_build.md)
+
+## 3. 运行编译可执行程序
+
+注意Windows上运行时，需要将FastDeploy依赖的库拷贝至可执行程序所在目录, 或者配置环境变量。FastDeploy提供了工具帮助我们快速将所有依赖库拷贝至可执行程序所在目录,通过如下命令将所有依赖的dll文件拷贝至可执行程序所在的目录
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x
+
+fastdeploy_init.bat install %cd% D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\cpp\build\Release
+```
+
+将dll拷贝到当前路径后，准备好模型和图片，使用如下命令运行可执行程序即可
+```shell
 cd Release
 infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0  # CPU
 infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 1  # GPU
 infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 2  # GPU + TensorRT
-```  
-
-特别说明，exe运行时所需要的依赖库配置方法，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)
-
-### 3.2 SDK使用方式二：Visual Studio 2019 创建 sln 工程使用 C++ SDK
-
-本章节针对非CMake用户，介绍如何在Visual Studio 2019 中创建 sln 工程使用 FastDeploy C++ SDK. CMake用户请直接看下一章节。另外，本章节内容特别感谢“梦醒南天”同学关于FastDeploy使用的文档教程：[如何在 Windows 上使用 FastDeploy C++ 部署 PaddleDetection 目标检测模型](https://www.bilibili.com/read/cv18807232)
-
-<div id="VisualStudio2019Sln"></div>  
-
-#### 3.2.1 步骤一：Visual Studio 2019 创建 sln 工程项目
-
-<div id="VisualStudio2019Sln1"></div>  
-
-（1） 打开Visual Studio 2019，点击"创建新项目"->点击"控制台程序"，从而创建新的sln工程项目.
-
-![image](https://user-images.githubusercontent.com/31974251/192813386-cf9a93e0-ee42-42b3-b8bf-d03ae7171d4e.png)
-
-![image](https://user-images.githubusercontent.com/31974251/192816516-a4965b9c-21c9-4a01-bbb2-c648a8256fc9.png)
-
-（2）点击“创建”，便创建了一个空的sln工程。我们直接从examples里面拷贝infer_ppyoloe的代码这里。
-
-![image](https://user-images.githubusercontent.com/31974251/192817382-643c8ca2-1f2a-412e-954e-576c22b4ea62.png)
-
-#### 3.2.2 步骤二：从examples中拷贝infer_ppyoloe.cc的代码到工程
-
-<div id="VisualStudio2019Sln2"></div>  
-
-（1）从examples中拷贝infer_ppyoloe.cc的代码到工程，直接替换即可，拷贝代码的路径为：  
-```bat
-fastdeploy-win-x64-gpu-0.2.1\examples\vision\detection\paddledetection\cpp
 ```

-![image](https://user-images.githubusercontent.com/31974251/192818456-21ca846c-ab52-4001-96d2-77c8174bff6b.png)  
+在此步骤中使用到的`fastdeploy_init.bat`提供更多其它功能，帮忙开发者使用，包括
+- 查看SDK中所有dll, lib和include的路径
+- 安装SDK中所有dll至指定目录
+- 配置SDK环境变量

-#### 3.2.3 步骤三：将工程配置设置成"Release x64"配置
-
-<div id="VisualStudio2019Sln3"></div>  
-
-![image](https://user-images.githubusercontent.com/31974251/192818918-98d7a54c-4a60-4760-a3cb-ecacc38b7e7a.png)
-
-#### 3.2.4 步骤四：配置头文件include路径
-
-<div id="VisualStudio2019Sln4"></div>  
-
-
-（1）配置头文件include路径：鼠标选择项目，然后单击右键即可弹出下来菜单，在其中单击“属性”。
-
-![image](https://user-images.githubusercontent.com/31974251/192820573-23096aea-046c-4bb4-9929-c412718805cb.png)
-
-
-（2）在弹出来的属性页中选择：C/C++ —> 常规 —> 附加包含目录，然后在添加 fastdeploy 和 opencv 的头文件路径。如：  
-
-```bat  
-
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\include
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv-win-x64-3.4.16\build\include  
-```  
-注意，如果是自行编译最新的SDK或版本>0.2.1，依赖库目录结构有所变动，opencv路径需要做出适当的修改。如：  
-```bat  
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv\build\include  
-```
-
-![image](https://user-images.githubusercontent.com/31974251/192824445-978c06ed-cc14-4d6a-8ccf-d4594ca11533.png)
-
-用户需要根据自己实际的sdk路径稍作修改。
-
-
-#### 3.2.5 步骤五：配置lib路径和添加库文件
-
-<div id="VisualStudio2019Sln5"></div>  
-
-（1）属性页中选择：链接器—>常规—> 附加库目录，然后在添加 fastdeploy 和 opencv 的lib路径。如：  
-```bat  
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\lib
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv-win-x64-3.4.16\build\x64\vc15\lib
-```
-注意，如果是自行编译最新的SDK或版本>0.2.1，依赖库目录结构有所变动，opencv路径需要做出适当的修改。如：  
-```bat  
-D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv\build\include  
-```  
-
-![image](https://user-images.githubusercontent.com/31974251/192826130-fe28791f-317c-4e66-a6a5-133e60b726f0.png)
-
-（2）添加库文件：只需要 fastdeploy.lib 和 opencv_world3416.lib  
-
- ![image](https://user-images.githubusercontent.com/31974251/192826884-44fc84a1-c57a-45f1-8ee2-30b7eaa3dce9.png)
-
-#### 3.2.6 步骤六：编译工程并运行获取结果
-
-<div id="VisualStudio2019Sln6"></div>  
-
-
-（1）点击菜单栏“生成”->“生成解决方案”
-
-![image](https://user-images.githubusercontent.com/31974251/192827608-beb53685-2f94-44dc-aa28-49b09a4ab864.png)
-
-![image](https://user-images.githubusercontent.com/31974251/192827842-1f05d435-8a3e-492b-a3b7-d5e88f85f814.png)  
-
-编译成功，可以看到exe保存在：  
-```bat  
-D:\qiuyanjun\fastdeploy_test\infer_ppyoloe\x64\Release\infer_ppyoloe.exe  
-```  
-
-（2）执行可执行文件，获得推理结果。 首先需要拷贝所有的dll到exe所在的目录下。同时，也需要把ppyoloe的模型文件和测试图片下载解压缩后，拷贝到exe所在的目录。 特别说明，exe运行时所需要的依赖库配置方法，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)  
-
-![image](https://user-images.githubusercontent.com/31974251/192829545-3ea36bfc-9a54-492b-984b-2d5d39094d47.png)  
-
-
-### 3.3 SDK使用方式三：Visual Studio 2019 创建 CMake 工程使用 C++ SDK
-<div id="VisualStudio2019"></div>  
-
-本章节针对CMake用户，介绍如何在Visual Studio 2019 中创建 CMake 工程使用 FastDeploy C++ SDK.
-
-#### 3.3.1 步骤一：Visual Studio 2019 创建“CMake”工程项目
-
-<div id="VisualStudio20191"></div>  
-
-（1）打开Visual Studio 2019，点击"创建新项目"->点击"CMake"，从而创建CMake工程项目。以PPYOLOE为例，来说明如何在Visual Studio 2019 IDE中使用FastDeploy C++ SDK.
-
-![image](https://user-images.githubusercontent.com/31974251/192143543-9f29e4cb-2307-45ca-a61a-bcfba5df19ff.png)
-
-![image](https://user-images.githubusercontent.com/31974251/192143640-39e79c65-8b50-4254-8da6-baa21bb23e3c.png)  
-
-
-![image](https://user-images.githubusercontent.com/31974251/192143713-be2e6490-4cab-4151-8463-8c367dbc451a.png)
-
-（2）打开工程发现，Visual Stuio 2019已经为我们生成了一些基本的文件，其中包括CMakeLists.txt。infer_ppyoloe.h头文件这里实际上用不到，我们可以直接删除。  
-
-![image](https://user-images.githubusercontent.com/31974251/192143930-db1655c2-66ee-448c-82cb-0103ca1ca2a0.png)  
-
-#### 3.3.2 步骤二：在CMakeLists中配置 FastDeploy C++ SDK
-
-<div id="VisualStudio20192"></div>  
-
-（1）在工程创建完成后，我们需要添加infer_ppyoloe推理源码，并修改CMakeLists.txt，修改如下：
-
-![image](https://user-images.githubusercontent.com/31974251/192144782-79bccf8f-65d0-4f22-9f41-81751c530319.png)
-
-（2）其中infer_ppyoloe.cpp的代码可以直接从examples中的代码拷贝过来：  
- [examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc](../../../examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc)
-
-（3）CMakeLists.txt主要包括配置FastDeploy C++ SDK的路径，如果是GPU版本的SDK，还需要配置CUDA_DIRECTORY为CUDA的安装路径，CMakeLists.txt的配置如下：
-
-```cmake
-project(infer_ppyoloe_demo C CXX)
-cmake_minimum_required(VERSION 3.12)
-
-# Only support "Release" mode now  
-set(CMAKE_BUILD_TYPE "Release")
-
-# Set FastDeploy install dir
-set(FASTDEPLOY_INSTALL_DIR "D:/qiuyanjun/fastdeploy-win-x64-gpu-0.2.1"
-    CACHE PATH "Path to downloaded or built fastdeploy sdk.")
-
-# Set CUDA_DIRECTORY (CUDA 11.x) for GPU SDK
-set(CUDA_DIRECTORY "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7"
-    CACHE PATH "Path to installed CUDA Toolkit.")
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.cpp)
-target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})  
-
-# Optional: install all DLLs to binary dir.
-install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
-```
-注意，`install_fastdeploy_libraries`函数仅在最新的代码编译的SDK或版本>0.2.1下有效。  
-
-#### 3.3.3 步骤三：生成工程缓存并修改CMakeSetting.json配置
-
-<div id="VisualStudio20193"></div>  
-
-（1）点击"CMakeLists.txt"->右键点击"生成缓存":  
-
-![image](https://user-images.githubusercontent.com/31974251/192145349-c78b110a-0e41-4ee5-8942-3bf70bd94a75.png)
-
-发现已经成功生成缓存了，但是由于打开工程时，默认是Debug模式，我们发现exe和缓存保存路径还是Debug模式下的。 我们可以先修改CMake的设置为Release.
-
-（2）点击"CMakeLists.txt"->右键点击"infer_ppyoloe_demo的cmake设置"，进入CMakeSettings.json的设置面板，把其中的Debug设置修改为Release.  
-
-![image](https://user-images.githubusercontent.com/31974251/192145242-01d37b44-e2fa-47df-82c1-c11c2ccbff99.png)  
-
-同时设置CMake生成器为 "Visual Studio 16 2019 Win64"
-
-![image](https://user-images.githubusercontent.com/31974251/192147961-ac46d0f6-7349-4126-a123-914af2b63d95.jpg)
-
-（3）点击保存CMake缓存以切换为Release配置：  
-
-![image](https://user-images.githubusercontent.com/31974251/192145974-b5a63341-9143-49a2-8bfe-94ac641b1670.png)
-
-（4）：（4.1）点击"CMakeLists.txt"->右键"CMake缓存仅限x64-Release"->"点击删除缓存"；（4.2）点击"CMakeLists.txt"->"生成缓存"；（4.3）如果在步骤一发现删除缓存的选项是灰色的可以直接点击"CMakeLists.txt"->"生成"，若生成失败则可以重复尝试（4.1）和（4。2）
-
-![image](https://user-images.githubusercontent.com/31974251/192146394-51fbf2b8-1cba-41ca-bb45-5f26890f64ce.jpg)  
-
-最终可以看到，配置已经成功生成Relase模式下的CMake缓存了。  
-
-![image](https://user-images.githubusercontent.com/31974251/192146239-a1eacd9e-034d-4373-a262-65b18ce25b87.png)  
-
-
-#### 3.3.4 步骤四：生成可执行文件，运行获取结果。
-
-<div id="VisualStudio20194"></div>  
-
-（1）点击"CMakeLists.txt"->"生成"。可以发现已经成功生成了infer_ppyoloe_demo.exe，并保存在`out/build/x64-Release/Release`目录下。  
-
-![image](https://user-images.githubusercontent.com/31974251/192146852-c64d2252-8c8f-4309-a950-908a5cb258b8.png)
-
-（2）执行可执行文件，获得推理结果。 首先需要拷贝所有的dll到exe所在的目录下，这里我们可以在CMakeLists.txt添加一下命令，可将FastDeploy中所有的dll安装到指定的目录。注意，该方式仅在最新的代码编译的SDK或版本>0.2.1下有效。其他配置方式，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)  
-
-```cmake  
-install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
-```  
-（3）同时，也需要把ppyoloe的模型文件和测试图片下载解压缩后，拷贝到exe所在的目录。 准备完成后，目录结构如下：  
-
-![image](https://user-images.githubusercontent.com/31974251/192147505-054edb77-564b-405e-89ee-fd0d2e413e78.png)
-
-（4）最后，执行以下命令获得推理结果：  
-
-```bat  
-D:\xxxinfer_ppyoloe\out\build\x64-Release\Release>infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0
-[INFO] fastdeploy/runtime.cc(304)::fastdeploy::Runtime::Init    Runtime initialized with Backend::OPENVINO in Device::CPU.
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-415.047180,89.311569, 506.009613, 283.863098, 0.950423, 0
-163.665710,81.914932, 198.585342, 166.760895, 0.896433, 0
-581.788635,113.027618, 612.623474, 198.521713, 0.842596, 0
-267.217224,89.777306, 298.796051, 169.361526, 0.837951, 0
-......
-153.301407,123.233757, 177.130539, 164.558350, 0.066697, 60
-505.887604,140.919601, 523.167236, 151.875336, 0.084912, 67
-
-Visualized result saved in ./vis_result.jpg
-```  
-
-打开保存的图片查看可视化结果：  
-
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/19339784/184326520-7075e907-10ed-4fad-93f8-52d0e35d4964.jpg", width=480px, height=320px />
-</div>
-
-特别说明，exe运行时所需要的依赖库配置方法，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)
-
-## 4. 多种方法配置exe运行时所需的依赖库
-<div id="CommandLineDeps"></div>  
-说明：对于使用的最新源码编译的SDK或SDK版本>0.2.1的用户，我们推荐使用（4.1）和（4.2）中的方式配置运行时的依赖库。如果使用的SDK版本<=0.2.1，请参考（4.3）和（4.4）中的方式进行配置。
-
-### 4.1 方式一：使用 fastdeploy_init.bat 进行配置（推荐）  
-<div id="CommandLineDeps1"></div>  
-
-对于版本高于0.2.1的SDK，我们提供了 **fastdeploy_init.bat** 工具来管理FastDeploy中所有的依赖库。可以通过该脚本工具查看(show)、拷贝(install) 和 设置(init and setup) SDK中所有的dll，方便用户快速完成运行时环境配置。
-
-#### 4.1.1 fastdeploy_init.bat 使用说明  
-<div id="CommandLineDeps11"></div>  
-
-首先进入SDK的根目录，运行以下命令，可以查看 fastdeploy_init.bat 的用法说明
-```bat
-D:\path-to-your-fastdeploy-sdk-dir>fastdeploy_init.bat help
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[1] [help]    print help information:                      fastdeploy_init.bat help
-[2] [show]    show all dlls/libs/include paths:            fastdeploy_init.bat show fastdeploy-sdk-dir
-[3] [init]    init all dlls paths for current terminal:    fastdeploy_init.bat init fastdeploy-sdk-dir  [WARNING: need copy onnxruntime.dll manually]
-[4] [setup]   setup path env for current terminal:         fastdeploy_init.bat setup fastdeploy-sdk-dir [WARNING: need copy onnxruntime.dll manually]
-[5] [install] install all dlls to a specific dir:          fastdeploy_init.bat install fastdeploy-sdk-dir another-dir-to-install-dlls **[RECOMMEND]**
-[6] [install] install all dlls with logging infos:         fastdeploy_init.bat install fastdeploy-sdk-dir another-dir-to-install-dlls info
------------------------------------------------------------------------------------------------------------------------------------------------------------
-```  
-用法简要说明如下：  
- help:     打印所有的用法说明  
- show:     查看SDK中所有的 dll、lib 和 include 路径
- init:     初始化所有dll路径信息，后续用于设置terminal环境变量（不推荐，请参考4.3中关于onnxruntime的说明）
- setup:    在init之后运行，设置terminal环境便令（不推荐，请参考4.3中关于onnxruntime的说明）  
- install:  将SDK中所有的dll安装到某个指定的目录（推荐）
-#### 4.1.2  fastdeploy_init.bat 查看 SDK 中所有的 dll、lib 和 include 路径  
-<div id="CommandLineDeps12"></div>  
-
-进入SDK的根目录，运行show命令，可以查看SDK中所有的 dll、lib 和 include 路径。以下命令中 %cd% 表示当前目录（SDK的根目录）。  
-```bat
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat show %cd%
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[SDK] D:\path-to-fastdeploy-sdk-dir
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[DLL] D:\path-to-fastdeploy-sdk-dir\lib\fastdeploy.dll **[NEEDED]**
-[DLL] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\lib\core_tokenizers.dll  **[NEEDED]**
-[DLL] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\x64\vc15\bin\opencv_ffmpeg3416_64.dll  **[NEEDED]**
-......
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[Lib] D:\path-to-fastdeploy-sdk-dir\lib\fastdeploy.lib **[NEEDED][fastdeploy]**
-[Lib] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\lib\core_tokenizers.lib  **[NEEDED][fastdeploy::text]**
-[Lib] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\x64\vc15\lib\opencv_world3416.lib  **[NEEDED][fastdeploy::vision]**
-......
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[Include] D:\path-to-fastdeploy-sdk-dir\include **[NEEDED][fastdeploy]**
-[Include] D:\path-to-fastdeploy-sdk-dir\third_libs\install\faster_tokenizer\include  **[NEEDED][fastdeploy::text]**
-[Include] D:\path-to-fastdeploy-sdk-dir\third_libs\install\opencv\build\include  **[NEEDED][fastdeploy::vision]**
-......
------------------------------------------------------------------------------------------------------------------------------------------------------------
-[XML] D:\path-to-fastdeploy-sdk-dir\third_libs\install\openvino\runtime\bin\plugins.xml **[NEEDED]**
------------------------------------------------------------------------------------------------------------------------------------------------------------
-```  
-可以看到该命令会根据您当前的SDK，输出对应的信息，包含 dll、lib 和 include 的路径信息。对于 dll，被标记为 `[NEEDED]`的，是运行时所需要的，如果包含OpenVINO后端，还需要将他的plugins.xml拷贝到exe所在的目录；对于 lib 和 include，被标记为`[NEEDED]`的，是开发时所需要配置的最小依赖。并且，我们还增加了对应的API Tag标记，如果您只使用vision API，则只需要配置标记为 `[NEEDED][fastdeploy::vision]` 的 lib 和 include 路径.  
-
-#### 4.1.3 fastdeploy_init.bat 安装 SDK 中所有的 dll 到指定的目录 （推荐）
-<div id="CommandLineDeps13"></div>  
-
-进入SDK的根目录，运行install命令，可以将SDK 中所有的 dll 安装到指定的目录（如exe所在的目录）。我们推荐这种方式来配置exe运行所需要的依赖库。比如，可以在SDK根目录下创建一个临时的bin目录备份所有的dll文件。以下命令中 %cd% 表示当前目录（SDK的根目录）。
-```bat  
-% info参数为可选参数，添加info参数后会打印详细的安装信息 %
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin info
-```
-```bat
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat install %cd% bin
-[INFO] Do you want to install all FastDeploy dlls ?
-[INFO] From: D:\path-to-fastdeploy-sdk-dir
-[INFO]   To: bin
-Choose y means YES, n means NO: [y/n]y
-YES.
-请按任意键继续. . .
-[INFO] Created bin done!
-已复制         1 个文件。
-已复制         1 个文件。
-已复制         1 个文件。
-已复制         1 个文件。
-.....
-已复制         1 个文件。
-已复制         1 个文件。
-已复制         1 个文件。
-已复制         1 个文件。
-.....
-```  
-#### 4.1.4 fastdeploy_init.bat 配置 SDK 环境变量  
-<div id="CommandLineDeps14"></div>  
-
-您也可以选择通过配置环境变量的方式来设置运行时的依赖库环境，这种方式只在当前的terminal有效。如果您使用的SDK中包含了onnxruntime推理后端，我们不推荐这种方式，详细原因请参考（4.3）中关于onnxruntime配置的说明（需要手动拷贝onnxruntime所有的dll到exe所在的目录）。配置 SDK 环境变量的方式如下。以下命令中 %cd% 表示当前目录（SDK的根目录）。
-```bat
-% 先运行 init 初始化当前SDK所有的dll文件路径 %
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat init %cd%
-% 再运行 setup 完成 SDK 环境变量配置  %
-D:\path-to-fastdeploy-sdk-dir>fastdeploy_init.bat setup %cd%
-```
-
-### 4.2 方式二：修改CMakeLists.txt，一行命令配置（推荐）
-<div id="CommandLineDeps2"></div>  
-
-考虑到Windows下C++开发的特殊性，如经常需要拷贝所有的lib或dll文件到某个指定的目录，FastDeploy提供了`install_fastdeploy_libraries`的cmake函数，方便用户快速配置所有的dll。修改ppyoloe的CMakeLists.txt，添加：  
-```cmake
-install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
-```
-注意，该方式仅在最新的代码编译的SDK或版本>0.2.1下有效。  
-
-### 4.3 方式三：命令行设置环境变量
-<div id="CommandLineDeps3"></div>  
-
-编译好的exe保存在Release目录下，在运行demo前，需要将模型和测试图片拷贝至该目录。另外，需要在终端指定DLL的搜索路径。请在build目录下执行以下命令。
-```bat
-set FASTDEPLOY_HOME=%cd%\..\..\..\..\..\..\..\fastdeploy-win-x64-gpu-0.2.1
-set PATH=%FASTDEPLOY_HOME%\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\onnxruntime\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\opencv-win-x64-3.4.16\build\x64\vc15\bin;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\paddle\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mkldnn\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mklml\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle2onnx\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\tensorrt\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\third_party\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\yaml-cpp\lib;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\bin;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\3rdparty\tbb\bin;%PATH%
-```  
-注意，需要拷贝onnxruntime.dll到exe所在的目录。
-```bat
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\onnxruntime\lib\onnxruntime* Release\
-```  
-由于较新的Windows在System32系统目录下自带了onnxruntime.dll，因此就算设置了PATH，系统依然会出现onnxruntime的加载冲突。因此需要先拷贝demo用到的onnxruntime.dll到exe所在的目录。如下
-```bat
-where onnxruntime.dll
-C:\Windows\System32\onnxruntime.dll  # windows自带的onnxruntime.dll
-```  
-另外，注意，如果是自行编译最新的SDK或版本>0.2.1，opencv和openvino目录结构有所改变，路径需要做出适当的修改。如：  
-```bat  
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\opencv\build\x64\vc15\bin;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\bin;%PATH%
-set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\3rdparty\tbb\bin;%PATH%
-```
-可以把上述命令拷贝并保存到build目录下的某个bat脚本文件中(包含copy onnxruntime)，如`setup_fastdeploy_dll.bat`，方便多次使用。
-```bat
-setup_fastdeploy_dll.bat
-```
-
-### 4.4 方式四：手动拷贝依赖库到exe的目录下
-
-<div id="CommandLineDeps4"></div>  
-
-手动拷贝，或者在build目录下执行以下命令：
-```bat
-set FASTDEPLOY_HOME=%cd%\..\..\..\..\..\..\..\fastdeploy-win-x64-gpu-0.2.1
-copy /Y %FASTDEPLOY_HOME%\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\onnxruntime\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\opencv-win-x64-3.4.16\build\x64\vc15\bin\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\paddle_inference\paddle\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mkldnn\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mklml\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\paddle2onnx\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\tensorrt\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\third_party\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\yaml-cpp\lib\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\openvino\bin\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\openvino\bin\*.xml Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\openvino\3rdparty\tbb\bin\*.dll Release\
-```
-另外，注意，如果是自行编译最新的SDK或版本>0.2.1，opencv和openvino目录结构有所改变，路径需要做出适当的修改。如：  
-```bat  
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\opencv\build\x64\vc15\bin\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\bin\*.dll Release\
-copy /Y %FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\3rdparty\tbb\bin\*.dll Release\
-```
-可以把上述命令拷贝并保存到build目录下的某个bat脚本文件中，如`copy_fastdeploy_dll.bat`，方便多次使用。
-```bat
-copy_fastdeploy_dll.bat
-```
-特别说明：上述的set和copy命令对应的依赖库路径，需要用户根据自己使用SDK中的依赖库进行适当地修改。比如，若是CPU版本的SDK，则不需要TensorRT相关的设置。
+具体可参考如下文档
+- [fastdeploy_init.bat工具的使用](./usage_of_fastdeploy_init_bat.md)
--- a/docs/cn/faq/use_sdk_on_windows_build.md
+++ b/docs/cn/faq/use_sdk_on_windows_build.md
@@ -0,0 +1,312 @@
+# FastDeploy C++库在Windows上的多种使用方式 
+
+## 目录
+- [1. 环境依赖](#Environment)  
+- [2. 下载 FastDeploy Windows 10 C++ SDK](#Download)  
+- [3. Windows下多种方式使用 C++ SDK 的方式](#CommandLine)
+  - [3.1 命令行方式使用 C++ SDK](#CommandLine)  
+    - [3.1.1 在 Windows 命令行终端 上编译 example](#CommandLine)  
+    - [3.1.2 运行可执行文件获得推理结果](#CommandLine)  
+  - [3.2 Visual Studio 2019 创建sln工程使用 C++ SDK](#VisualStudio2019Sln)  
+    - [3.2.1 Visual Studio 2019 创建 sln 工程项目](#VisualStudio2019Sln1)  
+    - [3.2.2 从examples中拷贝infer_ppyoloe.cc的代码到工程](#VisualStudio2019Sln2)  
+    - [3.2.3 将工程配置设置成"Release x64"配置](#VisualStudio2019Sln3)  
+    - [3.2.4 配置头文件include路径](#VisualStudio2019Sln4)  
+    - [3.2.5 配置lib路径和添加库文件](#VisualStudio2019Sln5)  
+    - [3.2.6 编译工程并运行获取结果](#VisualStudio2019Sln6)
+  - [3.3 Visual Studio 2019 创建CMake工程使用 C++ SDK](#VisualStudio2019)
+    - [3.3.1 Visual Studio 2019 创建CMake工程项目](#VisualStudio20191)  
+    - [3.3.2 在CMakeLists中配置 FastDeploy C++ SDK](#VisualStudio20192)  
+    - [3.3.3 生成工程缓存并修改CMakeSetting.json配置](#VisualStudio20193)  
+    - [3.3.4 生成可执行文件，运行获取结果](#VisualStudio20194)  
+
+
+## 1. 环境依赖
+<div id="Environment"></div>  
+
+- cmake >= 3.12
+- Visual Studio 16 2019
+- cuda >= 11.2 (当WITH_GPU=ON)
+- cudnn >= 8.0 (当WITH_GPU=ON)
+
+## 2. 下载 FastDeploy Windows 10 C++ SDK
+<div id="Download"></div>  
+
+### 2.1 下载预编译库或者从源码编译最新的SDK
+可以从以下链接下载编译好的 FastDeploy Windows 10 C++ SDK，SDK中包含了examples代码。
+```text
+https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-0.2.1.zip
+```
+源码编译请参考: [build_and_install](../build_and_install)
+### 2.2 准备模型文件和测试图片
+可以从以下链接下载模型文件和测试图片，并解压缩
+```text
+https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz # (下载后解压缩)
+https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+```
+
+## 3. Windows下多种方式使用 C++ SDK 的方式
+### 3.1 SDK使用方式一：命令行方式使用 C++ SDK
+<div id="CommandLine"></div>  
+
+#### 3.1.1 在 Windows 上编译 PPYOLOE
+Windows菜单打开`x64 Native Tools Command Prompt for VS 2019`命令工具，cd到ppyoloe的demo路径  
+```bat  
+cd fastdeploy-win-x64-gpu-0.2.1\examples\vision\detection\paddledetection\cpp
+```
+```bat
+mkdir build && cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -DFASTDEPLOY_INSTALL_DIR=%cd%\..\..\..\..\..\..\..\fastdeploy-win-x64-gpu-0.2.1 -DCUDA_DIRECTORY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2"
+```
+然后执行
+```bat
+msbuild infer_demo.sln /m:4 /p:Configuration=Release /p:Platform=x64
+```
+
+#### 3.1.2 运行 demo
+```bat
+cd Release
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0  # CPU
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 1  # GPU
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 2  # GPU + TensorRT
+```  
+
+特别说明，exe运行时所需要的依赖库配置方法，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)
+
+### 3.2 SDK使用方式二：Visual Studio 2019 创建 sln 工程使用 C++ SDK
+
+本章节针对非CMake用户，介绍如何在Visual Studio 2019 中创建 sln 工程使用 FastDeploy C++ SDK. CMake用户请直接看下一章节。另外，本章节内容特别感谢“梦醒南天”同学关于FastDeploy使用的文档教程：[如何在 Windows 上使用 FastDeploy C++ 部署 PaddleDetection 目标检测模型](https://www.bilibili.com/read/cv18807232)
+
+<div id="VisualStudio2019Sln"></div>  
+
+#### 3.2.1 步骤一：Visual Studio 2019 创建 sln 工程项目
+
+<div id="VisualStudio2019Sln1"></div>  
+
+（1） 打开Visual Studio 2019，点击"创建新项目"->点击"控制台程序"，从而创建新的sln工程项目.
+
+![image](https://user-images.githubusercontent.com/31974251/192813386-cf9a93e0-ee42-42b3-b8bf-d03ae7171d4e.png)
+
+![image](https://user-images.githubusercontent.com/31974251/192816516-a4965b9c-21c9-4a01-bbb2-c648a8256fc9.png)
+
+（2）点击“创建”，便创建了一个空的sln工程。我们直接从examples里面拷贝infer_ppyoloe的代码这里。
+
+![image](https://user-images.githubusercontent.com/31974251/192817382-643c8ca2-1f2a-412e-954e-576c22b4ea62.png)
+
+#### 3.2.2 步骤二：从examples中拷贝infer_ppyoloe.cc的代码到工程
+
+<div id="VisualStudio2019Sln2"></div>  
+
+（1）从examples中拷贝infer_ppyoloe.cc的代码到工程，直接替换即可，拷贝代码的路径为：  
+```bat
+fastdeploy-win-x64-gpu-0.2.1\examples\vision\detection\paddledetection\cpp
+```
+
+![image](https://user-images.githubusercontent.com/31974251/192818456-21ca846c-ab52-4001-96d2-77c8174bff6b.png)  
+
+#### 3.2.3 步骤三：将工程配置设置成"Release x64"配置
+
+<div id="VisualStudio2019Sln3"></div>  
+
+![image](https://user-images.githubusercontent.com/31974251/192818918-98d7a54c-4a60-4760-a3cb-ecacc38b7e7a.png)
+
+#### 3.2.4 步骤四：配置头文件include路径
+
+<div id="VisualStudio2019Sln4"></div>  
+
+
+（1）配置头文件include路径：鼠标选择项目，然后单击右键即可弹出下来菜单，在其中单击“属性”。
+
+![image](https://user-images.githubusercontent.com/31974251/192820573-23096aea-046c-4bb4-9929-c412718805cb.png)
+
+
+（2）在弹出来的属性页中选择：C/C++ —> 常规 —> 附加包含目录，然后在添加 fastdeploy 和 opencv 的头文件路径。如：  
+
+```bat  
+
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\include
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv-win-x64-3.4.16\build\include  
+```  
+注意，如果是自行编译最新的SDK或版本>0.2.1，依赖库目录结构有所变动，opencv路径需要做出适当的修改。如：  
+```bat  
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv\build\include  
+```
+
+![image](https://user-images.githubusercontent.com/31974251/192824445-978c06ed-cc14-4d6a-8ccf-d4594ca11533.png)
+
+用户需要根据自己实际的sdk路径稍作修改。
+
+
+#### 3.2.5 步骤五：配置lib路径和添加库文件
+
+<div id="VisualStudio2019Sln5"></div>  
+
+（1）属性页中选择：链接器—>常规—> 附加库目录，然后在添加 fastdeploy 和 opencv 的lib路径。如：  
+```bat  
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\lib
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv-win-x64-3.4.16\build\x64\vc15\lib
+```
+注意，如果是自行编译最新的SDK或版本>0.2.1，依赖库目录结构有所变动，opencv路径需要做出适当的修改。如：  
+```bat  
+D:\qiuyanjun\fastdeploy_build\built\fastdeploy-win-x64-gpu-0.2.1\third_libs\install\opencv\build\include  
+```  
+
+![image](https://user-images.githubusercontent.com/31974251/192826130-fe28791f-317c-4e66-a6a5-133e60b726f0.png)
+
+（2）添加库文件：只需要 fastdeploy.lib 和 opencv_world3416.lib  
+
+ ![image](https://user-images.githubusercontent.com/31974251/192826884-44fc84a1-c57a-45f1-8ee2-30b7eaa3dce9.png)
+
+#### 3.2.6 步骤六：编译工程并运行获取结果
+
+<div id="VisualStudio2019Sln6"></div>  
+
+
+（1）点击菜单栏“生成”->“生成解决方案”
+
+![image](https://user-images.githubusercontent.com/31974251/192827608-beb53685-2f94-44dc-aa28-49b09a4ab864.png)
+
+![image](https://user-images.githubusercontent.com/31974251/192827842-1f05d435-8a3e-492b-a3b7-d5e88f85f814.png)  
+
+编译成功，可以看到exe保存在：  
+```bat  
+D:\qiuyanjun\fastdeploy_test\infer_ppyoloe\x64\Release\infer_ppyoloe.exe  
+```  
+
+（2）执行可执行文件，获得推理结果。 首先需要拷贝所有的dll到exe所在的目录下。同时，也需要把ppyoloe的模型文件和测试图片下载解压缩后，拷贝到exe所在的目录。 特别说明，exe运行时所需要的依赖库配置方法，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)  
+
+![image](https://user-images.githubusercontent.com/31974251/192829545-3ea36bfc-9a54-492b-984b-2d5d39094d47.png)  
+
+
+### 3.3 SDK使用方式三：Visual Studio 2019 创建 CMake 工程使用 C++ SDK
+<div id="VisualStudio2019"></div>  
+
+本章节针对CMake用户，介绍如何在Visual Studio 2019 中创建 CMake 工程使用 FastDeploy C++ SDK.
+
+#### 3.3.1 步骤一：Visual Studio 2019 创建“CMake”工程项目
+
+<div id="VisualStudio20191"></div>  
+
+（1）打开Visual Studio 2019，点击"创建新项目"->点击"CMake"，从而创建CMake工程项目。以PPYOLOE为例，来说明如何在Visual Studio 2019 IDE中使用FastDeploy C++ SDK.
+
+![image](https://user-images.githubusercontent.com/31974251/192143543-9f29e4cb-2307-45ca-a61a-bcfba5df19ff.png)
+
+![image](https://user-images.githubusercontent.com/31974251/192143640-39e79c65-8b50-4254-8da6-baa21bb23e3c.png)  
+
+
+![image](https://user-images.githubusercontent.com/31974251/192143713-be2e6490-4cab-4151-8463-8c367dbc451a.png)
+
+（2）打开工程发现，Visual Stuio 2019已经为我们生成了一些基本的文件，其中包括CMakeLists.txt。infer_ppyoloe.h头文件这里实际上用不到，我们可以直接删除。  
+
+![image](https://user-images.githubusercontent.com/31974251/192143930-db1655c2-66ee-448c-82cb-0103ca1ca2a0.png)  
+
+#### 3.3.2 步骤二：在CMakeLists中配置 FastDeploy C++ SDK
+
+<div id="VisualStudio20192"></div>  
+
+（1）在工程创建完成后，我们需要添加infer_ppyoloe推理源码，并修改CMakeLists.txt，修改如下：
+
+![image](https://user-images.githubusercontent.com/31974251/192144782-79bccf8f-65d0-4f22-9f41-81751c530319.png)
+
+（2）其中infer_ppyoloe.cpp的代码可以直接从examples中的代码拷贝过来：  
+- [examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc](../../../examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc)
+
+（3）CMakeLists.txt主要包括配置FastDeploy C++ SDK的路径，如果是GPU版本的SDK，还需要配置CUDA_DIRECTORY为CUDA的安装路径，CMakeLists.txt的配置如下：
+
+```cmake
+project(infer_ppyoloe_demo C CXX)
+cmake_minimum_required(VERSION 3.12)
+
+# Only support "Release" mode now  
+set(CMAKE_BUILD_TYPE "Release")
+
+# Set FastDeploy install dir
+set(FASTDEPLOY_INSTALL_DIR "D:/qiuyanjun/fastdeploy-win-x64-gpu-0.2.1"
+    CACHE PATH "Path to downloaded or built fastdeploy sdk.")
+
+# Set CUDA_DIRECTORY (CUDA 11.x) for GPU SDK
+set(CUDA_DIRECTORY "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.7"
+    CACHE PATH "Path to installed CUDA Toolkit.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.cpp)
+target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})  
+
+# Optional: install all DLLs to binary dir.
+install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
+```
+注意，`install_fastdeploy_libraries`函数仅在最新的代码编译的SDK或版本>0.2.1下有效。  
+
+#### 3.3.3 步骤三：生成工程缓存并修改CMakeSetting.json配置
+
+<div id="VisualStudio20193"></div>  
+
+（1）点击"CMakeLists.txt"->右键点击"生成缓存":  
+
+![image](https://user-images.githubusercontent.com/31974251/192145349-c78b110a-0e41-4ee5-8942-3bf70bd94a75.png)
+
+发现已经成功生成缓存了，但是由于打开工程时，默认是Debug模式，我们发现exe和缓存保存路径还是Debug模式下的。 我们可以先修改CMake的设置为Release.
+
+（2）点击"CMakeLists.txt"->右键点击"infer_ppyoloe_demo的cmake设置"，进入CMakeSettings.json的设置面板，把其中的Debug设置修改为Release.  
+
+![image](https://user-images.githubusercontent.com/31974251/192145242-01d37b44-e2fa-47df-82c1-c11c2ccbff99.png)  
+
+同时设置CMake生成器为 "Visual Studio 16 2019 Win64"
+
+![image](https://user-images.githubusercontent.com/31974251/192147961-ac46d0f6-7349-4126-a123-914af2b63d95.jpg)
+
+（3）点击保存CMake缓存以切换为Release配置：  
+
+![image](https://user-images.githubusercontent.com/31974251/192145974-b5a63341-9143-49a2-8bfe-94ac641b1670.png)
+
+（4）：（4.1）点击"CMakeLists.txt"->右键"CMake缓存仅限x64-Release"->"点击删除缓存"；（4.2）点击"CMakeLists.txt"->"生成缓存"；（4.3）如果在步骤一发现删除缓存的选项是灰色的可以直接点击"CMakeLists.txt"->"生成"，若生成失败则可以重复尝试（4.1）和（4。2）
+
+![image](https://user-images.githubusercontent.com/31974251/192146394-51fbf2b8-1cba-41ca-bb45-5f26890f64ce.jpg)  
+
+最终可以看到，配置已经成功生成Relase模式下的CMake缓存了。  
+
+![image](https://user-images.githubusercontent.com/31974251/192146239-a1eacd9e-034d-4373-a262-65b18ce25b87.png)  
+
+
+#### 3.3.4 步骤四：生成可执行文件，运行获取结果。
+
+<div id="VisualStudio20194"></div>  
+
+（1）点击"CMakeLists.txt"->"生成"。可以发现已经成功生成了infer_ppyoloe_demo.exe，并保存在`out/build/x64-Release/Release`目录下。  
+
+![image](https://user-images.githubusercontent.com/31974251/192146852-c64d2252-8c8f-4309-a950-908a5cb258b8.png)
+
+（2）执行可执行文件，获得推理结果。 首先需要拷贝所有的dll到exe所在的目录下，这里我们可以在CMakeLists.txt添加一下命令，可将FastDeploy中所有的dll安装到指定的目录。注意，该方式仅在最新的代码编译的SDK或版本>0.2.1下有效。其他配置方式，请参考章节: [多种方法配置exe运行时所需的依赖库](#CommandLineDeps)  
+
+```cmake  
+install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR}/Release)
+```  
+（3）同时，也需要把ppyoloe的模型文件和测试图片下载解压缩后，拷贝到exe所在的目录。 准备完成后，目录结构如下：  
+
+![image](https://user-images.githubusercontent.com/31974251/192147505-054edb77-564b-405e-89ee-fd0d2e413e78.png)
+
+（4）最后，执行以下命令获得推理结果：  
+
+```bat  
+D:\xxxinfer_ppyoloe\out\build\x64-Release\Release>infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0
+[INFO] fastdeploy/runtime.cc(304)::fastdeploy::Runtime::Init    Runtime initialized with Backend::OPENVINO in Device::CPU.
+DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
+415.047180,89.311569, 506.009613, 283.863098, 0.950423, 0
+163.665710,81.914932, 198.585342, 166.760895, 0.896433, 0
+581.788635,113.027618, 612.623474, 198.521713, 0.842596, 0
+267.217224,89.777306, 298.796051, 169.361526, 0.837951, 0
+......
+153.301407,123.233757, 177.130539, 164.558350, 0.066697, 60
+505.887604,140.919601, 523.167236, 151.875336, 0.084912, 67
+
+Visualized result saved in ./vis_result.jpg
+```  
+
+打开保存的图片查看可视化结果：  
+
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/19339784/184326520-7075e907-10ed-4fad-93f8-52d0e35d4964.jpg", width=480px, height=320px />
+</div>
--- a/docs/en/build_and_install/download_prebuilt_libraries.md
+++ b/docs/en/build_and_install/download_prebuilt_libraries.md
@@ -93,8 +93,9 @@ Install the released version（Latest 1.0.3 for now, Android is 1.0.3）
 | Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.3.tgz) | clang++ 10.0.0|
 | Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.3.tgz) | clang++ 13.0.0 |
 | Linux aarch64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.3.tgz) | gcc 6.3 |  
-| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | NDK 25, clang++, support arm64-v8a and armeabi-v7a  |      
-| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | contains Text API, such as FastTokenizer and UIE，NDK 25, clang++, support arm64-v8a and armeabi-v7a  |
+| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a  |
+| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a  |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.3-shared.tgz) | only contains Text API, such as FastTokenizer and UIE, NDK 25, clang++, does not contain CV API, support arm64-v8a and armeabi-v7a  |

 ## Java SDK

@@ -102,8 +103,8 @@ Install the released version（Android is 1.0.3 pre-release）

 | Platform | File | Description |
 | :--- | :--- | :---- |
-| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | NDK 20, minSdkVersion 15, targetSdkVersion 28 |  
-| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | contains Text API, such as FastTokenizer and UI, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |

 Install the Develop version（Nightly build）

@@ -114,7 +115,8 @@ Install the Develop version（Nightly build）
 | Mac OSX x64 | [fastdeploy-osx-arm64-0.0.0.tgz](https://bj.bcebos.com/fastdeploy/dev/cpp/fastdeploy-osx-arm64-0.0.0.tgz) | - |
 | Mac OSX arm64 | [fastdeploy-osx-arm64-0.0.0.tgz](https://fastdeploy.bj.bcebos.com/dev/cpp/fastdeploy-osx-arm64-0.0.0.tgz) | clang++ 13.0.0 to compile |
 | Linux aarch64 | - | - |  
-| Android armv7&v8 | [fastdeploy-android-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-0.0.0-shared.tgz) | NDK 25, clang++, support arm64-v8a and armeabi-v7a |  
-| Android armv7&v8 | [fastdeploy-android-with-text-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-0.0.0-shared.tgz) | contains Text API, such as FastTokenizer and UIE，NDK 25, clang++, support arm64-v8a and armeabi-v7a |  
-| Android Java SDK | [fastdeploy-android-sdk-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-0.0.0.aar) | NDK 20, minSdkVersion 15, targetSdkVersion 28 |  
-| Android Java SDK | [fastdeploy-android-sdk-with-text-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-with-text-0.0.0.aar) | contains Text API, such as FastTokenizer and UI, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android armv7&v8 | [fastdeploy-android-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-0.0.0-shared.tgz) | CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a |  
+| Android armv7&v8 | [fastdeploy-android-with-text-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-0.0.0-shared.tgz) | contains Text API, such as FastTokenizer and UIE, CV API, such as OpenCV, NDK 25, clang++, support arm64-v8a and armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-0.0.0-shared.tgz](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-only-0.0.0-shared.tgz) | only contains Text API, such as FastTokenizer and UIE，NDK 25, clang++, does not contain CV API, support arm64-v8a and armeabi-v7a |
+| Android Java SDK | [fastdeploy-android-sdk-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-0.0.0.aar) | CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-0.0.0.aar](https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-sdk-with-text-0.0.0.aar) | contains Text API, such as FastTokenizer and UIE, CV API, such as OpenCV, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
--- a/docs/en/build_and_install/huawei_ascend.md
+++ b/docs/en/build_and_install/huawei_ascend.md
@@ -117,6 +117,12 @@ In end-to-end model inference, the pre-processing and post-processing phases are


 ## Deployment demo reference
- Deploying PaddleClas Classification Model on Huawei Ascend NPU using C++ please refer to: [PaddleClas Huawei Ascend NPU C++ Deployment Example](../../../examples/vision/classification/paddleclas/cpp/README.md)
-
- Deploying PaddleClas classification model on Huawei Ascend NPU using Python please refer to: [PaddleClas Huawei Ascend NPU Python Deployment Example](../../../examples/vision/classification/paddleclas/python/README.md)
+| Model | C++ Example | Python Example |
+| :-----------| :--------   | :--------------- |
+|   PaddleClas       |   [Ascend NPU C++ Example](../../../examples/vision/classification/paddleclas/cpp/README.md)       |    [Ascend NPU Python Example](../../../examples/vision/classification/paddleclas/python/README.md)          |  
+|   PaddleDetection  |      [Ascend NPU C++ Example](../../../examples/vision/detection/paddledetection/cpp/README.md)        |     [Ascend NPU Python Example](../../../examples/vision/detection/paddledetection/python/README.md)               |
+|   PaddleSeg        |      [Ascend NPU C++ Example](../../../examples/vision/segmentation/paddleseg/cpp/README.md)        |      [Ascend NPU Python Example](../../../examples//vision/segmentation/paddleseg/python/README.md)              |
+|   PaddleOCR        |     [Ascend NPU C++ Example](../../../examples/vision/ocr/PP-OCRv3/cpp/README.md)         |      [Ascend NPU Python Example](../../../examples/vision//ocr/PP-OCRv3/python/README.md)              |
+|   Yolov5           |      [Ascend NPU C++ Example](../../../examples/vision/detection/yolov5/cpp/README.md)       |       [Ascend NPU Python Example](../../../examples/vision/detection/yolov5/python/README.md)             |
+|   Yolov6           |      [Ascend NPU C++ Example](../../../examples/vision/detection/yolov6/cpp/README.md)        |       [Ascend NPU Python Example](../../../examples/vision/detection/yolov6/python/README.md)             |
+|   Yolov7           |      [Ascend NPU C++ Example](../../../examples/vision/detection/yolov7/cpp/README.md)        |       [Ascend NPU Python Example](../../../examples/vision/detection/yolov7/python/README.md)             |
--- a/docs/en/faq/rknpu2/build.md
+++ b/docs/en/faq/rknpu2/build.md
@@ -0,0 +1,78 @@
+English | [中文](../../../cn/faq/rknpu2/build.md) 
+# FastDeploy RKNPU2 Engine Compilation 
+
+## FastDeploy supported backends
+FastDeploy currently supports the following backends on the RK platform: 
+
+| Backend                | Platform                    | Supported model formats  | Notes                                          |
+|:------------------|:---------------------|:-------|:-------------------------------------------|
+| ONNX&nbsp;Runtime | RK356X   <br> RK3588 | ONNX   | Compile switch `ENABLE_ORT_BACKEND` is controlled by ON or OFF. Default OFF    |
+| RKNPU2            | RK356X   <br> RK3588 | RKNN   | Compile switch `ENABLE_RKNPU2_BACKEND` is controlled by ON or OFF. Default OFF  |
+
+## Compile FastDeploy SDK
+
+### Compile FastDeploy C++ SDK on board side 
+
+Currently, RKNPU2 is only available on linux. The following tutorial is completed on RK3568(debian 10) and RK3588(debian 11). 
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+
+# If you are using the develop branch, type the following command 
+git checkout develop
+
+mkdir build && cd build
+cmake ..  -DENABLE_ORT_BACKEND=ON \
+	      -DENABLE_RKNPU2_BACKEND=ON \
+	      -DENABLE_VISION=ON \
+	      -DRKNN2_TARGET_SOC=RK3588 \
+          -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0
+make -j8
+make install
+```
+
+### Cross-compile FastDeploy C++ SDK
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+
+# If you are using the develop branch, type the following command 
+git checkout develop
+
+mkdir build && cd build
+cmake ..  -DCMAKE_C_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc \
+          -DCMAKE_CXX_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++ \
+          -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
+          -DTARGET_ABI=arm64 \
+          -DENABLE_ORT_BACKEND=OFF \
+	      -DENABLE_RKNPU2_BACKEND=ON \
+	      -DENABLE_VISION=ON \
+	      -DRKNN2_TARGET_SOC=RK3588 \
+	      -DENABLE_FLYCV=ON \
+          -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0
+make -j8
+make install
+```
+
+### Compile the Python SDK on the board
+
+Currently, RKNPU2 is only available on linux. The following tutorial is  completed on RK3568(debian 10) and RK3588(debian 11). Packing Python is dependent on `wheel`, so run `pip install wheel` before compiling.
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+
+# If you are using the develop branch, type the following command 
+git checkout develop
+
+cd python
+export ENABLE_ORT_BACKEND=ON
+export ENABLE_RKNPU2_BACKEND=ON
+export ENABLE_VISION=ON
+export RKNN2_TARGET_SOC=RK3588
+python3 setup.py build
+python3 setup.py bdist_wheel
+cd dist
+pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl
+```
--- a/docs/en/faq/rknpu2/environment.md
+++ b/docs/en/faq/rknpu2/environment.md
@@ -0,0 +1,92 @@
+English | [中文](../../../cn/faq/rknpu2/environment.md) 
+# FastDeploy RKNPU2 inference environment setup
+
+## Introduction
+
+We need to set up the development environment before deploying models on FastDeploy. The environment setup of FastDeploy is divided into two parts: the board-side inference environment setup and the PC-side model conversion environment setup.
+
+## Board-side inference environment setup
+
+Based on the feedback from developers, we provide two ways to set up the inference environment on the board: one-click script installation script and command line installation of development board dirver.
+
+### Install via script
+
+Most developers don't like complex command lines for installation, so FastDeploy provides a one-click way for developers to install stable RKNN. Refer to the following command to set up the board side environment
+
+```bash
+# Download and unzip rknpu2_device_install_1.4.0
+wget https://bj.bcebos.com/fastdeploy/third_libs/rknpu2_device_install_1.4.0.zip
+unzip rknpu2_device_install_1.4.0.zip
+
+cd rknpu2_device_install_1.4.0
+# RK3588 runs the following code 
+sudo rknn_install_rk3588.sh
+# RK356X  runs the following code 
+sudo rknn_install_rk356X.sh
+```
+
+### Install via the command line 
+
+For developers who want to try out the latest RK drivers, we provide a method to install them from scratch using the following command line. 
+
+```bash
+# Install the required packages 
+sudo apt update -y
+sudo apt install -y python3
+sudo apt install -y python3-dev
+sudo apt install -y python3-pip
+sudo apt install -y gcc
+sudo apt install -y python3-opencv
+sudo apt install -y python3-numpy
+sudo apt install -y cmake
+
+# Download rknpu2
+# RK3588 runs the following code 
+git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
+sudo cp ./rknpu2/runtime/RK3588/Linux/librknn_api/aarch64/* /usr/lib
+sudo cp ./rknpu2/runtime/RK3588/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
+
+# RK356X  runs the following code 
+git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
+sudo cp ./rknpu2/runtime/RK356X/Linux/librknn_api/aarch64/* /usr/lib
+sudo cp ./rknpu2/runtime/RK356X/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
+```
+
+## Install rknn_toolkit2
+
+There are dependency issues when installing the rknn_toolkit2. Here are the installation tutorial. 
+rknn_toolkit2 depends on a few specific packages, so it is recommended to create a virtual environment using conda. The way to install conda is omitted and we mainly introduce how to install rknn_toolkit2.
+
+
+### Download rknn_toolkit2
+rknn_toolkit2 can usually be downloaded from git 
+```bash
+git clone https://github.com/rockchip-linux/rknn-toolkit2.git
+```
+
+### Download and install the required packages 
+```bash
+sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 \
+libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++
+```
+
+### Install rknn_toolkit2 environment 
+```bash
+# Create virtual environment
+conda create -n rknn2 python=3.6
+conda activate rknn2
+
+# Install numpy==1.16.6 first because rknn_toolkit2 has a specific numpy dependency
+pip install numpy==1.16.6
+
+# Install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
+cd ~/Download /rknn-toolkit2-master/packages
+pip install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
+```
+
+## Resource links 
+
+* [RKNPU2, rknntoolkit2 development board download  Password：rknn](https://eyun.baidu.com/s/3eTDMk6Y)
+
+## Other documents 
+- [RKNN model conversion document](./export.md)
--- a/docs/en/faq/rknpu2/issues.md
+++ b/docs/en/faq/rknpu2/issues.md
@@ -0,0 +1,47 @@
+English | [中文](../../../cn/faq/rknpu2/issues.md) 
+# RKNPU2 FAQs
+
+This document collects the common problems when using FastDeploy.
+
+## Navigation
+
+- [Link issues in dynamic link library](#动态链接库链接问题)
+
+## Link issues in dynamic link library
+
+### Association issue
+
+- [Issue 870](https://github.com/PaddlePaddle/FastDeploy/issues/870)
+
+### Problem Description 
+
+No problem during compiling, but the following error is reported when running the program
+```text
+error while loading shared libraries: libfastdeploy.so.0.0.0: cannot open shared object file: No such file or directory
+```
+
+### Analysis
+
+The linker ld indicates that the library file cannot be found. The default directories for ld are /lib and /usr/lib.
+Other directories are also OK, but you need to let ld know where the library files are located. 
+
+
+### Solutions
+
+**Temporary solution**
+
+This solution has no influence on the system, but it only works on the current terminal and fails when closing this terminal.
+
+```bash
+source PathToFastDeploySDK/fastdeploy_init.sh
+```
+
+**Permanent solution**
+
+The temporary solution fails because users need to retype the command each time they reopen the terminal to run the program. If you don't want to constantly run the code, execute the following code: 
+```bash
+source PathToFastDeploySDK/fastdeploy_init.sh
+sudo cp PathToFastDeploySDK/fastdeploy_libs.conf /etc/ld.so.conf.d/
+sudo ldconfig
+```
+After execution, the configuration file is written to the system. Refresh to let the system find the library location.
--- a/examples/runtime/python/infer_torchscript_poros.py
+++ b/examples/runtime/python/infer_torchscript_poros.py
@@ -51,7 +51,6 @@ if __name__ == '__main__':
    option.use_poros_backend()
    option.set_model_path(
        "std_resnet50_script.pt", model_format=ModelFormat.TORCHSCRIPT)
-    option.is_dynamic = True
    # compile
    runtime = fd.Runtime(option)
    runtime.compile(prewarm_datas)
--- a/examples/vision/classification/yolov5cls/cpp/infer.cc
+++ b/examples/vision/classification/yolov5cls/cpp/infer.cc
@@ -27,10 +27,9 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -48,10 +47,9 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -71,10 +69,9 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
--- a/examples/vision/classification/yolov5cls/python/infer.py
+++ b/examples/vision/classification/yolov5cls/python/infer.py
@@ -44,8 +44,9 @@ args = parse_arguments()
 runtime_option = build_option(args)
 model = fd.vision.classification.YOLOv5Cls(
    args.model, runtime_option=runtime_option)
+model.postprocessor.topk = args.topk

 # 预测图片分类结果
 im = cv2.imread(args.image)
-result = model.predict(im, args.topk)
+result = model.predict(im)
 print(result)
--- a/examples/vision/detection/paddledetection/cpp/README.md
+++ b/examples/vision/detection/paddledetection/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # PaddleDetection C++ Deployment Example

-This directory provides examples that `infer_xxx.cc` fast finishes the deployment of PaddleDetection models, including PPYOLOE/PicoDet/YOLOX/YOLOv3/PPYOLO/FasterRCNN/YOLOv5/YOLOv6/YOLOv7/RTMDet on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer_xxx.cc` fast finishes the deployment of PaddleDetection models, including PPYOLOE/PicoDet/YOLOX/YOLOv3/PPYOLO/FasterRCNN/YOLOv5/YOLOv6/YOLOv7/RTMDet on CPU/GPU and GPU accelerated by TensorRT.

 Before deployment, two steps require confirmation

@@ -15,13 +15,13 @@ ppyoloe is taken as an example for inference deployment

 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

-# Download the PPYOLOE model file and test images 
+# Download the PPYOLOE model file and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
 tar xvf ppyoloe_crn_l_300e_coco.tgz
@@ -33,12 +33,16 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
 # TensorRT Inference on GPU
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
+# Kunlunxin XPU Inference
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 3
+# Huawei Ascend Inference
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 4
 ```

 The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
 - [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)

-## PaddleDetection C++ Interface 
+## PaddleDetection C++ Interface

 ### Model Class

@@ -56,7 +60,7 @@ Loading and initializing PaddleDetection PPYOLOE model, where the format of mode

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path
 > * **config_file**(str): •	Configuration file path, which is the deployment yaml file exported by PaddleDetection
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
@@ -73,7 +77,7 @@ Loading and initializing PaddleDetection PPYOLOE model, where the format of mode
 > **Parameter**
 >
 > > * **im**: Input images in HWC or BGR format
-> > * **result**: Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult 
+> > * **result**: Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult

 - [Model Description](../../)
 - [Python Deployment](../python)
--- a/examples/vision/detection/paddledetection/python/README.md
+++ b/examples/vision/detection/paddledetection/python/README.md
@@ -9,11 +9,11 @@ Before deployment, two steps require confirmation.
 This directory provides examples that `infer_xxx.py` fast finishes the deployment of PPYOLOE/PicoDet models on CPU/GPU and GPU accelerated by TensorRT. The script is as follows

 ```bash
-# Download deployment example code 
+# Download deployment example code
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/examples/vision/detection/paddledetection/python/

-# Download the PPYOLOE model file and test images 
+# Download the PPYOLOE model file and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
 tar xvf ppyoloe_crn_l_300e_coco.tgz
@@ -24,6 +24,10 @@ python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439
 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu
 # TensorRT inference on GPU  （Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu --use_trt True
+# Kunlunxin XPU Inference
+python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device kunlunxin
+# Huawei Ascend Inference
+python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device ascend
 ```

 The visualized result after running is as follows
@@ -31,7 +35,7 @@ The visualized result after running is as follows
 <img src="https://user-images.githubusercontent.com/19339784/184326520-7075e907-10ed-4fad-93f8-52d0e35d4964.jpg", width=480px, height=320px />
 </div>

-## PaddleDetection Python Interface 
+## PaddleDetection Python Interface

 ```python
 fastdeploy.vision.detection.PPYOLOE(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
@@ -52,7 +56,7 @@ PaddleDetection model loading and initialization, among which model_file and par

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path
 > * **config_file**(str): Inference configuration yaml file path
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
--- a/examples/vision/detection/yolov5/cpp/README.md
+++ b/examples/vision/detection/yolov5/cpp/README.md
@@ -12,12 +12,12 @@ Taking the CPU inference on Linux as an example, the compilation test can be com
 ```bash
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
-# Download the official converted yolov5 Paddle model files and test images 
+# Download the official converted yolov5 Paddle model files and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar
 tar -xvf yolov5s_infer.tar
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
@@ -31,11 +31,13 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 2
 # KunlunXin XPU inference
 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 3
+# Huawei Ascend Inference
+./infer_paddle_demo yolov5s_infer 000000014439.jpg 4
 ```

 The above steps apply to the inference of Paddle models. If you want to conduct the inference of ONNX models, follow these steps:
 ```bash
-# 1. Download the official converted yolov5 ONNX model files and test images 
+# 1. Download the official converted yolov5 ONNX model files and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

@@ -53,7 +55,7 @@ The visualized result after running is as follows
 The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
 - [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)

-## YOLOv5 C++ Interface 
+## YOLOv5 C++ Interface

 ### YOLOv5 Class

@@ -69,7 +71,7 @@ YOLOv5 model loading and initialization, among which model_file is the exported

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path. Merely passing an empty string when the model is in ONNX format
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
 > * **model_format**(ModelFormat): Model format. ONNX format by default
--- a/examples/vision/detection/yolov5/python/README.md
+++ b/examples/vision/detection/yolov5/python/README.md
@@ -22,17 +22,19 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device cpu
 # GPU inference
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu
-# TensorRT inference on GPU 
+# TensorRT inference on GPU
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu --use_trt True
 # KunlunXin XPU inference
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device kunlunxin
+# Huawei Ascend Inference
+python infer.py --model yolov5s_infer --image 000000014439.jpg --device ascend
 ```

 The visualized result after running is as follows

 <img width="640" src="https://user-images.githubusercontent.com/67993288/184309358-d803347a-8981-44b6-b589-4608021ad0f4.jpg">

-## YOLOv5 Python Interface 
+## YOLOv5 Python Interface

 ```python
 fastdeploy.vision.detection.YOLOv5(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
@@ -42,7 +44,7 @@ YOLOv5  model loading and initialization, among which model_file is the exported

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path. No need to set when the model is in ONNX format
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
 > * **model_format**(ModelFormat): Model format. ONNX format by default
--- a/examples/vision/detection/yolov6/python/README.md
+++ b/examples/vision/detection/yolov6/python/README.md
@@ -23,6 +23,9 @@ python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --d
 python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device gpu
 # KunlunXin XPU inference
 python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device kunlunxin
+# Huawei Ascend Inference
+python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device ascend
+
 ```
 If you want to verify the inference of ONNX models, refer to the following command:
 ```bash
@@ -34,7 +37,7 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer.py --model yolov6s.onnx --image 000000014439.jpg --device cpu
 # GPU inference
 python infer.py --model yolov6s.onnx --image 000000014439.jpg --device gpu
-# TensorRT inference on GPU 
+# TensorRT inference on GPU
 python infer.py --model yolov6s.onnx --image 000000014439.jpg --device gpu --use_trt True
 ```

@@ -42,7 +45,7 @@ The visualized result after running is as follows

 <img width="640" src="https://user-images.githubusercontent.com/67993288/184301725-390e4abb-db2b-482d-931d-469381322626.jpg">

-## YOLOv6 Python Interface 
+## YOLOv6 Python Interface

 ```python
 fastdeploy.vision.detection.YOLOv6(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
@@ -52,7 +55,7 @@ YOLOv6 model loading and initialization, among which model_file is the exported

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path. No need to set when the model is in ONNX format
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
 > * **model_format**(ModelFormat): Model format. ONNX format by default
--- a/examples/vision/detection/yolov7/cpp/README.md
+++ b/examples/vision/detection/yolov7/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # YOLOv7 C++ Deployment Example

-This directory provides examples that `infer.cc` fast finishes the deployment of YOLOv7 on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer.cc` fast finishes the deployment of YOLOv7 on CPU/GPU and GPU accelerated by TensorRT.

 Before deployment, two steps require confirmation

@@ -13,7 +13,7 @@ Taking the CPU inference on Linux as an example, the compilation test can be com
 ```bash
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy  Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy  Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
@@ -29,10 +29,12 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 1
 # KunlunXin XPU inference
 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 2
+# Huawei Ascend inference
+./infer_paddle_model_demo yolov7_infer 000000014439.jpg 3
 ```
 If you want to verify the inference of ONNX models, refer to the following command:
 ```bash
-# Download the official converted yolov7 ONNX model files and test images 
+# Download the official converted yolov7 ONNX model files and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

@@ -52,7 +54,7 @@ The visualized result after running is as follows
 The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
 - [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)

-## YOLOv7 C++ Interface 
+## YOLOv7 C++ Interface

 ### YOLOv7 Class

@@ -68,7 +70,7 @@ YOLOv7 model loading and initialization, among which model_file is the exported

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path. Merely passing an empty string when the model is in ONNX format
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
 > * **model_format**(ModelFormat): Model format. ONNX format by default
@@ -86,7 +88,7 @@ YOLOv7 model loading and initialization, among which model_file is the exported
 > **Parameter**
 >
 > > * **im**: Input images in HWC or BGR format
-> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for DetectionResult 
+> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for DetectionResult
 > > * **conf_threshold**: Filtering threshold of detection box confidence
 > > * **nms_iou_threshold**: iou threshold during NMS processing

--- a/examples/vision/detection/yolov8/cpp/README.md
+++ b/examples/vision/detection/yolov8/cpp/README.md
@@ -0,0 +1,90 @@
+English | [简体中文](README_CN.md)
+# YOLOv8 C++ Deployment Example
+
+This directory provides the example that `infer.cc` fast finishes the deployment of YOLOv8 on CPU/GPU and GPU through TensorRT.
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code based on your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+Taking the CPU inference on Linux as an example, FastDeploy version 1.0.3 or above (x.x.x>=1.0.3) is required to support this model.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 1. Download the official converted YOLOv8 ONNX model files and test images 
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov8s.onnx
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# CPU inference
+./infer_demo yolov8s.onnx 000000014439.jpg 0
+# GPU inference
+./infer_demo yolov8s.onnx 000000014439.jpg 1
+# TensorRT inference on GPU 
+./infer_demo yolov8s.onnx 000000014439.jpg 2
+```
+The visualized result is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/67993288/184309358-d803347a-8981-44b6-b589-4608021ad0f4.jpg">
+
+he above command works for Linux or MacOS. For SDK in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+If you use Huawei Ascend NPU deployment, refer to the following document  to initialize the deployment environment:
+- [How to use Huawei Ascend NPU deployment](../../../../../docs/cn/faq/use_sdk_on_ascend.md)
+
+## YOLOv8 C++ Interface
+
+### YOLOv8
+
+```c++
+fastdeploy::vision::detection::YOLOv8(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+YOLOv8 model loading and initialization, among which model_file is the exported ONNX model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. Merely passing an empty string when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default
+
+#### Predict function
+
+> ```c++
+> YOLOv8::Predict(cv::Mat* im, DetectionResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for DetectionResult.
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following preprocessing parameters based on actual needs to change the final inference and deployment results
+
+> > * **size**(vector&lt;int&gt;): This parameter changes the resize used during preprocessing, containing two integer elements for [width, height] with default value [640, 640]
+> > * **padding_value**(vector&lt;float&gt;): This parameter is used to change the padding value of images during resize, containing three floating-point elements that represent the value of three channels. Default value [114, 114, 114]
+> > * **is_no_pad**(bool): Specify whether to resize the image through padding. `is_no_pad=ture` represents no paddling. Default `is_no_pad=false`
+> > * **is_mini_pad**(bool): This parameter sets the width and height of the image after resize to the value nearest to the `size` member variable and to the point where the padded pixel size is divisible by the `stride` member variable. Default `is_mini_pad=false`
+> > * **stride**(int): Used with the `stris_mini_pad` member variable. Default `stride=32`
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/detection/yolov8/cpp/README_CN.md
+++ b/examples/vision/detection/yolov8/cpp/README_CN.md
@@ -81,7 +81,7 @@ YOLOv8模型加载和初始化，其中model_file为导出的ONNX模型格式。
 > > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[640, 640]
 > > * **padding_value**(vector&lt;float&gt;): 通过此参数可以修改图片在resize时候做填充(padding)的值, 包含三个浮点型元素, 分别表示三个通道的值, 默认值为[114, 114, 114]
 > > * **is_no_pad**(bool): 通过此参数让图片是否通过填充的方式进行resize, `is_no_pad=ture` 表示不使用填充的方式，默认值为`is_no_pad=false`
-> > * **is_mini_pad**(bool): 通过此参数可以将resize之后图像的宽高这是为最接近`size`成员变量的值, 并且满足填充的像素大小是可以被`stride`成员变量整除的。默认值为`is_mini_pad=false`
+> > * **is_mini_pad**(bool): 通过此参数可以将resize之后图像的宽高设置为最接近`size`成员变量的值, 并且满足填充的像素大小是可以被`stride`成员变量整除的。默认值为`is_mini_pad=false`
 > > * **stride**(int): 配合`stris_mini_pad`成员变量使用, 默认值为`stride=32`

 - [模型介绍](../../)
--- a/examples/vision/detection/yolov8/python/README.md
+++ b/examples/vision/detection/yolov8/python/README.md
@@ -0,0 +1,78 @@
+English | [简体中文](README_CN.md)
+# YOLOv8 Python Deployment Example
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides the example that `infer.py` fast finishes the deployment of YOLOv8 on CPU/GPU and GPU through TensorRT. The script is as follows
+
+```bash
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/detection/yolov8/python/
+
+# Download yolov8 model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov8.onnx
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# CPU inference
+python infer.py --model yolov8.onnx --image 000000014439.jpg --device cpu
+# GPU inference
+python infer.py --model yolov8.onnx --image 000000014439.jpg --device gpu
+# TensorRT inference on GPU 
+python infer.py --model yolov8.onnx --image 000000014439.jpg --device gpu --use_trt True
+```
+
+The visualized result is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/67993288/184309358-d803347a-8981-44b6-b589-4608021ad0f4.jpg">
+
+## YOLOv8 Python Interface
+
+```python
+fastdeploy.vision.detection.YOLOv8(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+```
+
+YOLOv8 model loading and initialization, among which model_file is the exported ONNX model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. No need to set when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default
+
+### predict function
+
+> ```python
+> YOLOv8.predict(image_data)
+> ```
+>
+> Model prediction interface. Input images and output detection results
+>
+> **Parameter**
+>
+> > * **image_data**(np.ndarray): Input data in HWC or BGR format
+
+> **Return**
+>
+> > Return the `fastdeploy.vision.DetectionResult`structure, refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for its description
+
+### Class Member Property
+#### Pre-processing Parameter
+Users can modify the following preprocessing parameters based on actual needs to change the final inference and deployment results
+
+> > * **size**(list[int]): This parameter changes the resize used during preprocessing, containing two integer elements for [width, height] with default value [640, 640]
+> > * **padding_value**(list[float]): This parameter is used to change the padding value of images during resize, containing three floating-point elements that represent the value of three channels. Default value [114, 114, 114]
+> > * **is_no_pad**(bool): Specify whether to resize the image through padding. `is_no_pad=True` represents no paddling. Default `is_no_pad=False`
+> > * **is_mini_pad**(bool): This parameter sets the width and height of the image after resize to the value nearest to the `size` member variable and to the point where the padded pixel size is divisible by the `stride` member variable. Default `is_mini_pad=False`
+> > * **stride**(int): Used with the `stris_mini_padide` member variable. Default `stride=32`
+
+## Other Documents
+
+- [YOLOv8 Model Description](..)
+- [YOLOv8 C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/detection/yolov8/python/README_CN.md
+++ b/examples/vision/detection/yolov8/python/README_CN.md
@@ -50,7 +50,7 @@ YOLOv8模型加载和初始化，其中model_file为导出的ONNX模型格式
 > YOLOv8.predict(image_data)
 > ```
 >
-> 模型预测结口，输入图像直接输出检测结果。
+> 模型预测接口，输入图像直接输出检测结果。
 >
 > **参数**
 >
--- a/examples/vision/facedet/blazeface/README.md
+++ b/examples/vision/facedet/blazeface/README.md
@@ -0,0 +1,34 @@
+English | [简体中文](README_CN.md)
+# BlazeFace Ready-to-deploy Model
+
+- BlazeFace deployment model implementation comes from [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection),and [Pre-training model based on WiderFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection)
+  - （1）Provided in [Official library
+](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/tools) *.params, could deploy after operation [export_model.py](#Export PADDLE model);
+  - （2）Developers can train BlazeFace model based on their own data according to [export_model. py](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/tools/export_model.py)After exporting the model, complete the deployment。
+
+## Export PADDLE model
+
+Visit [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection) Github library, download and install according to the instructions, download the `. yml` and `. params` model parameters, and use` export_ Model. py `gets the` pad `model file`. yml,. pdiparams,. pdmodel `.
+
+
+* Download BlazeFace model parameter file
+
+|Network structure | input size | number of pictures/GPU | learning rate strategy | Easy/Media/Hard Set | prediction delay (SD855) | model size (MB) | download | configuration file|
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[Download link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Config file](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[Download link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Config file](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+* Export paddle-format file
+  ```bash
+  python tools/export_model.py -c configs/face_detection/blazeface_1000e.yml -o weights=blazeface_1000e.pdparams --export_serving_model=True
+  ```
+
+## Detailed Deployment Tutorials
+
+- [Python Deployment](python)
+- [C++ Deployment](cpp)
+
+
+## Release Note
+
+- This tutorial and related code are written based on [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection)
--- a/examples/vision/facedet/blazeface/README_CN.md
+++ b/examples/vision/facedet/blazeface/README_CN.md
@@ -0,0 +1,31 @@
+# BlazeFace准备部署模型
+
+- BlazeFace部署模型实现来自[BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection),和[基于WiderFace的预训练模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection)
+  - （1）[官方库](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/tools)中提供的*.params,通过[export_model.py](#导出PADDLE模型)操作后，可进行部署；
+  - （2）开发者基于自己数据训练的BlazeFace模型，可按照[export_model.py](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/tools/export_model.py)导出模型后，完成部署。
+
+## 导出PADDLE模型
+
+访问[BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection)github库，按照指引下载安装，下载`.yml`和`.params` 模型参数，利用 `export_model.py` 得到`paddle`模型文件`.yml, .pdiparams, .pdmodel`。
+
+* 下载BlazeFace模型参数文件
+
+| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set  | 预测时延（SD855）| 模型大小(MB) | 下载 | 配置文件 |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+* 导出paddle格式文件
+  ```bash
+  python tools/export_model.py -c configs/face_detection/blazeface_1000e.yml -o weights=blazeface_1000e.pdparams --export_serving_model=True
+  ```
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
+
+
+## 版本说明
+
+- 本版本文档和代码基于[BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/face_detection) 编写
--- a/examples/vision/facedet/blazeface/cpp/CMakeLists.txt
+++ b/examples/vision/facedet/blazeface/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# Specifies the path to the fastdeploy library after you have downloaded it
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(../../../../../FastDeploy.cmake)
+
+# Add the FastDeploy dependency header
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# Add the FastDeploy library dependency
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/facedet/blazeface/cpp/README.md
+++ b/examples/vision/facedet/blazeface/cpp/README.md
@@ -0,0 +1,78 @@
+English | [简体中文](README_CN.md)
+# BlazeFace C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of BlazeFace on CPU/GPU。
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz # x.x.x >= 1.0.4
+tar xvf fastdeploy-linux-x64-x.x.x.tgz # x.x.x >= 1.0.4
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x # x.x.x >= 1.0.4
+make -j
+
+#Download the official converted YOLOv7Face model files and test images
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/blzeface-1000e.tgz
+
+#Use blazeface-1000e model
+# CPU inference
+./infer_demo blazeface-1000e/ test_lite_face_detector_3.jpg 0
+# GPU Inference
+./infer_demo blazeface-1000e/ test_lite_face_detector_3.jpg 1
+```
+
+The visualized result after running is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/49013063/206170111-843febb6-67d6-4c46-a121-d87d003bba21.jpg">
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## BlazeFace C++ Interface
+
+### BlazeFace Class
+
+```c++
+fastdeploy::vision::facedet::BlazeFace(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+BlazeFace model loading and initialization, among which model_file is the exported PADDLE model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. Only passing an empty string when the model is in PADDLE format
+> * **config_file**(str): Config file path. Only passing an empty string when the model is in PADDLE format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. PADDLE format by default
+
+#### Predict Function
+
+> ```c++
+> BlazeFace::Predict(cv::Mat& im, FaceDetectionResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for FaceDetectionResult
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/blazeface/cpp/README_CN.md
+++ b/examples/vision/facedet/blazeface/cpp/README_CN.md
@@ -0,0 +1,77 @@
+[English](README.md) | 简体中文
+# BlazeFace C++部署示例
+
+本目录下提供`infer.cc`快速完成BlazeFace在CPU/GPU部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz # x.x.x >= 1.0.4
+tar xvf fastdeploy-linux-x64-x.x.x.tgz # x.x.x >= 1.0.4
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x # x.x.x >= 1.0.4
+make -j
+
+#下载官方转换好的BlazeFace模型文件和测试图片
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/blzeface-1000e.tgz
+
+#使用blazeface-1000e模型
+# CPU推理
+./infer_demo blazeface-1000e/ test_lite_face_detector_3.jpg 0
+# GPU推理
+./infer_demo blazeface-1000e/ test_lite_face_detector_3.jpg 1
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/49013063/206170111-843febb6-67d6-4c46-a121-d87d003bba21.jpg">
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## BlazeFace C++接口
+
+### BlazeFace类
+
+```c++
+fastdeploy::vision::facedet::BlazeFace(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+BlazeFace模型加载和初始化，其中model_file为导出的PADDLE模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX时，此参数传入空字符串即可
+> * **config_file**(str): 配置文件路径，当模型格式为ONNX时，此参数传入空字符串即可
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为PADDLE格式
+
+#### Predict函数
+
+> ```c++
+> BlazeFace::Predict(cv::Mat& im, FaceDetectionResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 检测结果，包括检测框，各个框的置信度, FaceDetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/blazeface/cpp/infer.cc
+++ b/examples/vision/facedet/blazeface/cpp/infer.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseCpu();
+  auto model = fastdeploy::vision::facedet::BlazeFace(
+                model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::FaceDetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  auto model = fastdeploy::vision::facedet::BlazeFace(
+                model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::FaceDetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
+                 "e.g ./infer_model yolov5s-face.onnx ./test.jpeg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
--- a/examples/vision/facedet/blazeface/python/README.md
+++ b/examples/vision/facedet/blazeface/python/README.md
@@ -0,0 +1,68 @@
+English | [简体中文](README_CN.md)
+# BlazeFace Python Deployment Example
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides examples that `infer.py` fast finishes the deployment of BlazeFace on CPU/GPU.
+
+```bash
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/facedet/blazeface/python/
+
+# Download BlazeFace model files and test images
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/blazeface-1000e.tgz
+
+# Use blazeface-1000e model
+# CPU Inference
+python infer.py --model blazeface-1000e/ --image test_lite_face_detector_3.jpg --device cpu
+# GPU Inference
+python infer.py --model blazeface-1000e/ --image test_lite_face_detector_3.jpg --device gpu
+```
+
+The visualized result after running is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/67993288/184301839-a29aefae-16c9-4196-bf9d-9c6cf694f02d.jpg">
+
+## BlazeFace Python Interface
+
+```python
+fastdeploy.vision.facedet.BlzaeFace(model_file, params_file=None, runtime_option=None, config_file=None, model_format=ModelFormat.PADDLE)
+```
+
+BlazeFace model loading and initialization, among which model_file is the exported PADDLE model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path. No need to set when the model is in PADDLE format
+> * **config_file**(str): config file path. No need to set when the model is in PADDLE format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat):  Model format. PADDLE format by default
+
+### predict function
+
+> ```python
+> BlazeFace.predict(input_image)
+> ```
+> Through let BlazeFace.postprocessor.conf_threshold = 0.2，to modify conf_threshold
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **input_image**(np.ndarray): Input image in HWC or BGR format
+
+> **Return**
+>
+> > Return`fastdeploy.vision.FaceDetectionResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for its description.
+
+## Other Documents
+
+- [BlazeFace Model Description](..)
+- [BlazeFace C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/blazeface/python/README_CN.md
+++ b/examples/vision/facedet/blazeface/python/README_CN.md
@@ -0,0 +1,68 @@
+[English](README.md) | 简体中文
+# BlazeFace Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成BlazeFace在CPU/GPU部署的示例。执行如下脚本即可完成
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/facedet/blazeface/python/
+
+#下载BlazeFace模型文件和测试图片
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/blazeface-1000e.tgz
+
+#使用blazeface-1000e模型
+# CPU推理
+python infer.py --model blazeface-1000e/ --image test_lite_face_detector_3.jpg --device cpu
+# GPU推理
+python infer.py --model blazeface-1000e/ --image test_lite_face_detector_3.jpg --device gpu
+```
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/67993288/184301839-a29aefae-16c9-4196-bf9d-9c6cf694f02d.jpg">
+
+## BlazeFace Python接口
+
+```python
+fastdeploy.vision.facedet.BlzaeFace(model_file, params_file=None, runtime_option=None, config_file=None, model_format=ModelFormat.PADDLE)
+```
+
+BlazeFace模型加载和初始化
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX格式时，此参数无需设定
+> * **config_file**(str): config文件路径，当模型格式为ONNX格式时，此参数无需设定
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为PADDLE
+
+### predict函数
+
+> ```python
+> BlazeFace.predict(input_image)
+> ```
+> 通过BlazeFace.postprocessor.conf_threshold = 0.2，来修改conf_threshold
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.FaceDetectionResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+## 其它文档
+
+- [BlazeFace 模型介绍](..)
+- [BlazeFace C++部署](../cpp)
+- [模型预测结果说明](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/blazeface/python/infer.py
+++ b/examples/vision/facedet/blazeface/python/infer.py
@@ -0,0 +1,58 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of blazeface model dir.")
+    parser.add_argument(
+        "--image", required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--use_trt",
+        type=ast.literal_eval,
+        default=False,
+        help="Wether to use tensorrt.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+
+    if args.use_trt:
+        option.use_trt_backend()
+        option.set_trt_input_shape("images", [1, 3, 640, 640])
+    return option
+
+
+args = parse_arguments()
+
+model_dir = args.model
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# Configure runtime and load the model
+runtime_option = build_option(args)
+model = fd.vision.facedet.BlazeFace(model_file, params_file, config_file, runtime_option=runtime_option)
+
+# Predict image detection results
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+# Visualization of prediction Results
+vis_im = fd.vision.vis_face_detection(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/facedet/centerface/README.md
+++ b/examples/vision/facedet/centerface/README.md
@@ -0,0 +1,25 @@
+English | [简体中文](README_CN.md)
+
+# CenterFace Ready-to-deploy Model
+
+- The deployment of the CenterFace model is based on [CenterFace](https://github.com/Star-Clouds/CenterFace.git) and [Pre-trained Model Based on WIDER FACE](https://github.com/Star-Clouds/CenterFace.git)
+  - （1）The *.onnx provided by [Official Repository](https://github.com/Star-Clouds/CenterFace.git) can be deployed directly；
+  - （2）The CenterFace train code is not open source and users cannot train it.
+
+
+## Download Pre-trained ONNX Model
+
+For developers' testing, models exported by CenterFace are provided below. Developers can download them directly. (The accuracy in the following table is derived from the source official repository on WIDER FACE test set)
+| Model                                                               | Size    | Accuracy(Easy Set,Medium Set,Hard Set)  | Note |
+|:---------------------------------------------------------------- |:----- |:----- |:---- |
+| [CenterFace](https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx) | 7.2MB | 93.2%,92.1%,87.3% | This model file is sourced from [CenterFace](https://github.com/Star-Clouds/CenterFace.git)，MIT license |
+
+
+## Detailed Deployment Documents
+
+- [Python Deployment](python)
+- [C++ Deployment](cpp)
+
+## Release Note
+
+- Document and code are based on [CenterFace](https://github.com/Star-Clouds/CenterFace.git) 
--- a/examples/vision/facedet/centerface/README_CN.md
+++ b/examples/vision/facedet/centerface/README_CN.md
@@ -0,0 +1,24 @@
+[English](README.md) | 简体中文
+# CenterFace准备部署模型
+
+- CenterFace部署模型实现来自[CenterFace](https://github.com/Star-Clouds/CenterFace.git),和[基于WIDER FACE的预训练模型](https://github.com/Star-Clouds/CenterFace.git)
+  - （1）[官方库](https://github.com/Star-Clouds/CenterFace.git)提供的*.onnx可直接进行部署；
+  - （2）由于CenterFace未开放训练源代码，开发者无法基于自己的数据训练CenterFace模型
+
+
+## 下载预训练ONNX模型
+
+为了方便开发者的测试，下面提供了CenterFace导出的模型，开发者可直接下载使用。（下表中模型的精度来源于源官方库在WIDER FACE测试集上的结果）
+| 模型                                                               | 大小    | 精度(Easy Set,Medium Set,Hard Set)  | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- |:---- |
+| [CenterFace](https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx) | 7.2MB | 93.2%,92.1%,87.3% | 此模型文件来源于[CenterFace](https://github.com/Star-Clouds/CenterFace.git)，MIT license |
+
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
+
+## 版本说明
+
+- 本版本文档和代码基于[CenterFace](https://github.com/Star-Clouds/CenterFace.git) 编写
--- a/examples/vision/facedet/centerface/cpp/CMakeLists.txt
+++ b/examples/vision/facedet/centerface/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# Specifies the path to the fastdeploy library after you have downloaded it
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# Include the FastDeploy dependency header file
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# Add the FastDeploy library dependency
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/facedet/centerface/cpp/README.md
+++ b/examples/vision/facedet/centerface/cpp/README.md
@@ -0,0 +1,78 @@
+English | [简体中文](README_CN.md)
+# CenterFace C++ Deployment Example
+
+This directory provides examples that `infer.cc`  fast finishes the deployment of CenterFace on CPU/GPU and GPU accelerated by TensorRT. 
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz # x.x.x > 1.0.4
+tar xvf fastdeploy-linux-x64-x.x.x.tgz # x.x.x > 1.0.4
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x # x.x.x > 1.0.4
+make -j
+
+# Download the official converted CenterFace model files and test images 
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx
+
+# Use CenterFace.onnx model
+# CPU inference
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 0
+# GPU inference
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 1
+# TensorRT inference on GPU
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 2
+```
+
+The visualized result after running is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/215670067-e14b5205-e303-4c3a-9812-be4a81173dc6.jpg">
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## CenterFace C++ Interface 
+
+### CenterFace Class
+
+```c++
+fastdeploy::vision::facedet::CenterFace(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+CenterFace model loading and initialization, among which model_file is the exported ONNX model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. Only passing an empty string when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default
+
+#### Predict Function
+
+> ```c++
+> CenterFace::Predict(cv::Mat* im, FaceDetectionResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for FaceDetectionResult
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/centerface/cpp/README_CN.md
+++ b/examples/vision/facedet/centerface/cpp/README_CN.md
@@ -0,0 +1,77 @@
+# CenterFace C++部署示例
+
+本目录下提供`infer.cc`快速完成CenterFace在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz # x.x.x > 1.0.4
+tar xvf fastdeploy-linux-x64-x.x.x.tgz # x.x.x > 1.0.4
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x # x.x.x > 1.0.4
+make -j
+
+#下载官方转换好的CenterFace模型文件和测试图片
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx
+
+#使用CenterFace.onnx模型
+# CPU推理
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 0
+# GPU推理
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 1
+# GPU上TensorRT推理
+./infer_demo CenterFace.onnx test_lite_face_detector_3.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/215670067-e14b5205-e303-4c3a-9812-be4a81173dc6.jpg">
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## CenterFace C++接口
+
+### CenterFace类
+
+```c++
+fastdeploy::vision::facedet::CenterFace(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+CenterFace模型加载和初始化，其中model_file为导出的ONNX模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX时，此参数传入空字符串即可
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为ONNX格式
+
+#### Predict函数
+
+> ```c++
+> CenterFace::Predict(cv::Mat* im, FaceDetectionResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 检测结果，包括检测框，各个框的置信度, FaceDetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/centerface/cpp/infer.cc
+++ b/examples/vision/facedet/centerface/cpp/infer.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void CpuInfer(const std::string& model_file, const std::string& image_file) {
+  auto model = fastdeploy::vision::facedet::CenterFace(model_file);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::FaceDetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_file, const std::string& image_file) {
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  auto model = fastdeploy::vision::facedet::CenterFace(model_file, "", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::FaceDetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void TrtInfer(const std::string& model_file, const std::string& image_file) {
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  option.UseTrtBackend();
+  option.SetTrtInputShape("images", {1, 3, 640, 640});
+  auto model = fastdeploy::vision::facedet::CenterFace(model_file, "", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::FaceDetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
+                 "e.g ./infer_model yolov5s-face.onnx ./test.jpeg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    TrtInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
--- a/examples/vision/facedet/centerface/python/README.md
+++ b/examples/vision/facedet/centerface/python/README.md
@@ -0,0 +1,75 @@
+English | [简体中文](README_CN.md)
+# CenterFace Python Deployment Example
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides examples that `infer.py` fast finishes the deployment of CenterFace on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+
+```bash
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/facedet/CenterFace/python/
+
+# Download CenterFace model files and test images
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx
+
+# Use CenterFace.onnx model
+# CPU inference
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device cpu
+# GPU inference
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device gpu
+# TensorRT inference on GPU 
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device gpu --use_trt True
+```
+
+The visualized result after running is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/215670067-e14b5205-e303-4c3a-9812-be4a81173dc6.jpg">
+
+## CenterFace Python Interface 
+
+```python
+fastdeploy.vision.facedet.CenterFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+```
+
+CenterFace model loading and initialization, among which model_file is the exported ONNX model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. No need to set when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default
+
+### predict function
+
+> ```python
+> CenterFace.predict(image_data)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **image_data**(np.ndarray): Input data in HWC or BGR format
+
+
+> **Return**
+>
+> > Return `fastdeploy.vision.FaceDetectionResult`  structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for its description.
+
+### Class Member Property
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+> > * **size**(list[int]): This parameter changes the size of the resize used during preprocessing, containing two integer elements for [width, height] with default value [640, 640]
+
+## Other Documents
+
+- [CenterFace Model Description](..)
+- [CenterFace C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/centerface/python/README_CN.md
+++ b/examples/vision/facedet/centerface/python/README_CN.md
@@ -0,0 +1,74 @@
+[English](README.md) | 简体中文
+# CenterFace Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成CenterFace在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/facedet/CenterFace/python/
+
+#下载CenterFace模型文件和测试图片
+wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/CenterFace.onnx
+
+#使用CenterFace.onnx模型
+# CPU推理
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device cpu
+# GPU推理
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device gpu
+# GPU上使用TensorRT推理
+python infer.py --model CenterFace.onnx --image test_lite_face_detector_3.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/215670067-e14b5205-e303-4c3a-9812-be4a81173dc6.jpg">
+
+## CenterFace Python接口
+
+```python
+fastdeploy.vision.facedet.CenterFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+```
+
+CenterFace模型加载和初始化，其中model_file为导出的ONNX模型格式
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX格式时，此参数无需设定
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为ONNX
+
+### predict函数
+
+> ```python
+> CenterFace.predict(image_data)
+> ```
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.FaceDetectionResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **size**(list[int]): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[640, 640]
+
+## 其它文档
+
+- [CenterFace 模型介绍](..)
+- [CenterFace C++部署](../cpp)
+- [模型预测结果说明](../../../../../docs/api/vision_results/)
--- a/examples/vision/facedet/centerface/python/infer.py
+++ b/examples/vision/facedet/centerface/python/infer.py
@@ -0,0 +1,51 @@
+import fastdeploy as fd
+import cv2
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of CenterFace onnx model.")
+    parser.add_argument(
+        "--image", required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--use_trt",
+        type=ast.literal_eval,
+        default=False,
+        help="Wether to use tensorrt.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+
+    if args.use_trt:
+        option.use_trt_backend()
+        option.set_trt_input_shape("images", [1, 3, 640, 640])
+    return option
+
+
+args = parse_arguments()
+
+# Configure runtime and load the model
+runtime_option = build_option(args)
+model = fd.vision.facedet.CenterFace(args.model, runtime_option=runtime_option)
+
+# Predict image detection results
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+# Visualization of prediction Results
+vis_im = fd.vision.vis_face_detection(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/faceid/insightface/rknpu2/cpp/README.md
+++ b/examples/vision/faceid/insightface/rknpu2/cpp/README.md
@@ -1,19 +1,19 @@
-[English](README.md) | 简体中文
-# InsightFace C++部署示例
+English | [简体中文](README_CN.md)
+# InsightFace C++ Deployment Example

-FastDeploy支持在RKNPU上部署包括ArcFace\CosFace\VPL\Partial_FC在内的InsightFace系列模型。
+FastDeploy supports the deployment of InsightFace models like ArcFace\CosFace\VPL\Partial_FC on RKNPU.

-本目录下提供`infer_arcface.cc`快速完成InsighFace模型包括ArcFace在CPU/RKNPU加速部署的示例。
+This directoty provides the example that `infer_arcface.cc` fast finishes the deployment of InsighFace models like ArcFace on CPU/RKNPU.


-在部署前，需确认以下两个步骤:
+Two steps before deployment:

-1. 软硬件环境满足要求
-2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
+1. Software and hardware should meet the requirements. 
+2. Download the precompiled deployment library or deploy FastDeploy repository from scratch according to your development environment. 

-以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
+Refer to [RK2 generation NPU deployment library compilation](../../../../../../docs/cn/build_and_install/rknpu2.md) for the above steps

-在本目录执行如下命令即可完成编译测试
+The compilation can be completed by executing the following command in this directory. 

 ```bash
 mkdir build
@@ -24,18 +24,18 @@ tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

-# 下载官方转换好的ArcFace模型文件和测试图片
+# Download the official converted ArcFace model files and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r18.onnx
 wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
 unzip face_demo.zip

-# CPU推理
+# CPU inference
 ./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 0
-# RKNPU推理
+# RKNPU inference
 ./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 1
 ```

-运行完成可视化结果如下图所示
+The visualized result is as follows

 <div width="700">
 <img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321537-860bf857-0101-4e92-a74c-48e8658d838c.JPG">
@@ -43,12 +43,12 @@ unzip face_demo.zip
 <img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321622-d9a494c3-72f3-47f1-97c5-8a2372de491f.JPG">
 </div>

-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
- [如何在Windows中使用FastDeploy C++ SDK](../../../../../../docs/cn/faq/use_sdk_on_windows.md)
+The above command works for Linux or MacOS. For SDK in Windows, refer to: 
+- [How to use FastDeploy C++ SDK in Windows](../../../../../../docs/cn/faq/use_sdk_on_windows.md)

-## InsightFace C++接口
+## InsightFace C++ Interface

-### ArcFace类
+### ArcFace 

 ```c++
 fastdeploy::vision::faceid::ArcFace(
@@ -58,9 +58,9 @@ fastdeploy::vision::faceid::ArcFace(
        const ModelFormat& model_format = ModelFormat::ONNX)
 ```

-ArcFace模型加载和初始化，其中model_file为导出的ONNX模型格式。
+ArcFace model loading and initialization, among which model_file is the exported ONNX model format

-### CosFace类
+### CosFace

 ```c++
 fastdeploy::vision::faceid::CosFace(
@@ -70,9 +70,9 @@ fastdeploy::vision::faceid::CosFace(
        const ModelFormat& model_format = ModelFormat::ONNX)
 ```

-CosFace模型加载和初始化，其中model_file为导出的ONNX模型格式。
+CosFace model loading and initialization, among which model_file is the exported ONNX model format

-### PartialFC类
+### PartialFC

 ```c++
 fastdeploy::vision::faceid::PartialFC(
@@ -82,9 +82,9 @@ fastdeploy::vision::faceid::PartialFC(
        const ModelFormat& model_format = ModelFormat::ONNX)
 ```

-PartialFC模型加载和初始化，其中model_file为导出的ONNX模型格式。
+PartialFC model loading and initialization, among which model_file is the exported ONNX model format

-### VPL类
+### VPL

 ```c++
 fastdeploy::vision::faceid::VPL(
@@ -94,43 +94,43 @@ fastdeploy::vision::faceid::VPL(
        const ModelFormat& model_format = ModelFormat::ONNX)
 ```

-VPL模型加载和初始化，其中model_file为导出的ONNX模型格式。
-**参数**
+VPL model loading and initialization, among which model_file is the exported ONNX model format
+**Parameter**

-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径，当模型格式为ONNX时，此参数传入空字符串即可
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为ONNX格式
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. Merely passing an empty string when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default

-#### Predict函数
+#### Predict function

 > ```c++
 > ArcFace::Predict(const cv::Mat& im, FaceRecognitionResult* result)
 > ```
 >
-> 模型预测接口，输入图像直接输出检测结果。
+> Model prediction interface. Input images and output detection results
 >
-> **参数**
+> **Parameter**
 >
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度, FaceRecognitionResult说明参考[视觉模型预测结果](../../../../../../docs/api/vision_results/)
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: Detection results, including detection box and confidence of each box. Refer to [Vision Model Prediction Results] for the description of FaceRecognitionResult(../../../../../../docs/api/vision_results/)

-### 修改预处理以及后处理的参数
-预处理和后处理的参数的需要通过修改InsightFaceRecognitionPostprocessor，InsightFaceRecognitionPreprocessor的成员变量来进行修改。
+### Change pre-processing and post-processing parameters 
+Pre-processing and post-processing parameters can be changed by modifying the member variables of InsightFaceRecognitionPostprocessor and InsightFaceRecognitionPreprocessor

-#### InsightFaceRecognitionPreprocessor成员变量(预处理参数)
-> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112],
-      通过InsightFaceRecognitionPreprocessor::SetSize(std::vector<int>& size)来进行修改
-> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5],
-      通过InsightFaceRecognitionPreprocessor::SetAlpha(std::vector<float>& alpha)来进行修改
-> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f],
-      通过InsightFaceRecognitionPreprocessor::SetBeta(std::vector<float>& beta)来进行修改
+#### Member variables of InsightFaceRecognitionPreprocessor (preprocessing parameters)
+> > * **size**(vector&lt;int&gt;): This parameter changes the resize during preprocessing, containing two integer elements for [width, height] with default value [112, 112].
+      Revise through InsightFaceRecognitionPreprocessor::SetSize(std::vector<int>& size)
+> > * **alpha**(vector&lt;float&gt;): Preprocess normalized alpha, and calculated as `x'=x*alpha+beta`. Alpha defaults to [1. / 127.5, 1.f / 127.5, 1. / 127.5].
+      Revise through InsightFaceRecognitionPreprocessor::SetAlpha(std::vector<float>& alpha)
+> > * **beta**(vector&lt;float&gt;): Preprocess normalized beta, and calculated as `x'=x*alpha+beta`. Alpha defaults to [-1.f, -1.f, -1.f],
+      Revise through InsightFaceRecognitionPreprocessor::SetBeta(std::vector<float>& beta)

-#### InsightFaceRecognitionPostprocessor成员变量(后处理参数)
-> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false,
-      InsightFaceRecognitionPostprocessor::SetL2Normalize(bool& l2_normalize)来进行修改
+####  Member variables of InsightFaceRecognitionPostprocessor(post-processing parameters)
+> > * **l2_normalize**(bool): Whether to perform l2 normalization before outputting the face vector. Default false.
+      Revise through InsightFaceRecognitionPostprocessor::SetL2Normalize(bool& l2_normalize)

- [模型介绍](../../../)
- [Python部署](../python)
- [视觉模型预测结果](../../../../../../docs/api/vision_results/README.md)
- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
+- [Model Description](../../../)
+- [Python Deployemnt](../python)
+- [Vision Model Prediction Results](../../../../../../docs/api/vision_results/README.md)
+- [How to switch the backend engine](../../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/faceid/insightface/rknpu2/cpp/README_CN.md
+++ b/examples/vision/faceid/insightface/rknpu2/cpp/README_CN.md
@@ -0,0 +1,136 @@
+[English](README.md) | 简体中文
+# InsightFace C++部署示例
+
+FastDeploy支持在RKNPU上部署包括ArcFace\CosFace\VPL\Partial_FC在内的InsightFace系列模型。
+
+本目录下提供`infer_arcface.cc`快速完成InsighFace模型包括ArcFace在CPU/RKNPU加速部署的示例。
+
+
+在部署前，需确认以下两个步骤:
+
+1. 软硬件环境满足要求
+2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
+
+以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
+
+在本目录执行如下命令即可完成编译测试
+
+```bash
+mkdir build
+cd build
+# FastDeploy version need >=1.0.3
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载官方转换好的ArcFace模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r18.onnx
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
+unzip face_demo.zip
+
+# CPU推理
+./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 0
+# RKNPU推理
+./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 1
+```
+
+运行完成可视化结果如下图所示
+
+<div width="700">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321537-860bf857-0101-4e92-a74c-48e8658d838c.JPG">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184322004-a551e6e4-6f47-454e-95d6-f8ba2f47b516.JPG">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321622-d9a494c3-72f3-47f1-97c5-8a2372de491f.JPG">
+</div>
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## InsightFace C++接口
+
+### ArcFace类
+
+```c++
+fastdeploy::vision::faceid::ArcFace(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+ArcFace模型加载和初始化，其中model_file为导出的ONNX模型格式。
+
+### CosFace类
+
+```c++
+fastdeploy::vision::faceid::CosFace(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+CosFace模型加载和初始化，其中model_file为导出的ONNX模型格式。
+
+### PartialFC类
+
+```c++
+fastdeploy::vision::faceid::PartialFC(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+PartialFC模型加载和初始化，其中model_file为导出的ONNX模型格式。
+
+### VPL类
+
+```c++
+fastdeploy::vision::faceid::VPL(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+VPL模型加载和初始化，其中model_file为导出的ONNX模型格式。
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX时，此参数传入空字符串即可
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为ONNX格式
+
+#### Predict函数
+
+> ```c++
+> ArcFace::Predict(const cv::Mat& im, FaceRecognitionResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 检测结果，包括检测框，各个框的置信度, FaceRecognitionResult说明参考[视觉模型预测结果](../../../../../../docs/api/vision_results/)
+
+### 修改预处理以及后处理的参数
+预处理和后处理的参数的需要通过修改InsightFaceRecognitionPostprocessor，InsightFaceRecognitionPreprocessor的成员变量来进行修改。
+
+#### InsightFaceRecognitionPreprocessor成员变量(预处理参数)
+> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112],
+      通过InsightFaceRecognitionPreprocessor::SetSize(std::vector<int>& size)来进行修改
+> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5],
+      通过InsightFaceRecognitionPreprocessor::SetAlpha(std::vector<float>& alpha)来进行修改
+> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f],
+      通过InsightFaceRecognitionPreprocessor::SetBeta(std::vector<float>& beta)来进行修改
+
+#### InsightFaceRecognitionPostprocessor成员变量(后处理参数)
+> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false,
+      通过InsightFaceRecognitionPostprocessor::SetL2Normalize(bool& l2_normalize)来进行修改
+
+- [模型介绍](../../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../../docs/api/vision_results/README.md)
+- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/faceid/insightface/rknpu2/python/README.md
+++ b/examples/vision/faceid/insightface/rknpu2/python/README.md
@@ -0,0 +1,108 @@
+English | [简体中文](README_CN.md)
+# InsightFace Python Deployment Example
+
+FastDeploy supports the deployment of InsightFace models like ArcFace\CosFace\VPL\Partial on RKNPU.
+
+This directoty provides the example that `infer_arcface.py` fast finishes the deployment of InsighFace models like ArcFace on CPU/RKNPU.
+
+
+Two steps before deployment:
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../../docs/cn/build_and_install/rknpu2.md)
+
+```bash
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/faceid/insightface/python/
+
+# Download ArcFace model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r100.onnx
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
+unzip face_demo.zip
+
+# CPU inference
+python infer_arcface.py --model ms1mv3_arcface_r100.onnx \
+                        --face face_0.jpg \
+                        --face_positive face_1.jpg \
+                        --face_negative face_2.jpg \
+                        --device cpu
+# GPU inference
+python infer_arcface.py --model ms1mv3_arcface_r100.onnx \
+                        --face face_0.jpg \
+                        --face_positive face_1.jpg \
+                        --face_negative face_2.jpg \
+                        --device gpu
+```
+
+The visualized result is as follows
+
+<div width="700">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321537-860bf857-0101-4e92-a74c-48e8658d838c.JPG">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184322004-a551e6e4-6f47-454e-95d6-f8ba2f47b516.JPG">
+<img width="220" float="left" src="https://user-images.githubusercontent.com/67993288/184321622-d9a494c3-72f3-47f1-97c5-8a2372de491f.JPG">
+</div>
+
+```bash
+Prediction Done!
+--- [Face 0]:FaceRecognitionResult: [Dim(512), Min(-2.309220), Max(2.372197), Mean(0.016987)]
+--- [Face 1]:FaceRecognitionResult: [Dim(512), Min(-2.288258), Max(1.995104), Mean(-0.003400)]
+--- [Face 2]:FaceRecognitionResult: [Dim(512), Min(-3.243411), Max(3.875866), Mean(-0.030682)]
+Detect Done! Cosine 01: 0.814385, Cosine 02:-0.059388
+
+```
+
+## InsightFace Python interface
+
+```python
+fastdeploy.vision.faceid.ArcFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+fastdeploy.vision.faceid.CosFace(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+fastdeploy.vision.faceid.PartialFC(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+fastdeploy.vision.faceid.VPL(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+```
+
+ArcFace model loading and initialization, among which model_file is the exported ONNX model format
+
+**Parameter**
+
+> * **model_file**(str): Model file path 
+> * **params_file**(str): Parameter file path. No need to set when the model is in ONNX format
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. ONNX format by default
+
+### predict function
+
+> ```python
+> ArcFace.predict(image_data)
+> ```
+>
+> Model prediction interface. Input images and output prediction results
+>
+> **Parameter**
+>
+> > * **image_data**(np.ndarray): Input data in HWC or BGR format
+
+> **Return**
+>
+> > Return the `fastdeploy.vision.FaceRecognitionResult` structure. Refer to [Vision Model Prediction Results](../../../../../../docs/api/vision_results/) for its description
+
+### Class Member Property
+#### Pre-processing Parameter
+Users can modify the following preprocessing parameters based on actual needs to change the final inference and deployment results.
+
+#### Member Variables of AdaFacePreprocessor
+The followings are the member variables of AdaFacePreprocessor
+> > * **size**(list[int]): This parameter changes the resize used during preprocessing, containing two integer elements for [width, height] with default value [112, 112]
+> > * **alpha**(list[float]): Preprocess normalized alpha, and calculated as `x'=x*alpha+beta`. Alpha defaults to [1. / 127.5, 1.f / 127.5, 1. / 127.5]
+> > * **beta**(list[float]): Preprocess normalized beta, and calculated as `x'=x*alpha+beta`. beta defaults to [-1.f, -1.f, -1.f]
+
+#### Member Variables of AdaFacePostprocessor
+The followings are the member variables of AdaFacePostprocessor
+> > * **l2_normalize**(bool): Whether to perform l2 normalization before outputting the face vector. Default false.
+
+
+## Other Documents
+
+- [InsightFace Model Description](..)
+- [InsightFace C++ Deployment](../cpp)
+- [Vision Model Prediction Results](../../../../../../docs/api/vision_results/)
+- [How to switch the backend engine](../../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/faceid/insightface/rknpu2/python/README_CN.md
+++ b/examples/vision/faceid/insightface/rknpu2/python/README_CN.md
@@ -75,7 +75,7 @@ ArcFace模型加载和初始化，其中model_file为导出的ONNX模型格式
 > ArcFace.predict(image_data)
 > ```
 >
-> 模型预测结口，输入图像直接输出检测结果。
+> 模型预测接口，输入图像直接输出检测结果。
 >
 > **参数**
 >
--- a/examples/vision/ocr/PP-OCRv2/cpp/README.md
+++ b/examples/vision/ocr/PP-OCRv2/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # PPOCRv2 C++ Deployment Example

-This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv2 on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv2 on CPU/GPU and GPU accelerated by TensorRT.

 Two steps before deployment

@@ -13,7 +13,7 @@ Taking the CPU inference on Linux as an example, the compilation test can be com
 ```
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
@@ -54,7 +54,7 @@ The visualized result after running is as follows
 <img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">


-## PPOCRv2 C++ Interface 
+## PPOCRv2 C++ Interface

 ### PPOCRv2 Class

@@ -98,7 +98,7 @@ The initialization of PPOCRv2, consisting of detection and recognition models (N
 > > * **result**: OCR prediction results, including the position of the detection box from the detection model, the classification of the direction from the classification model, and the recognition result from the recognition model. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for OCRResult


-## DBDetector C++ Interface 
+## DBDetector C++ Interface

 ### DBDetector Class

@@ -112,7 +112,7 @@ DBDetector model loading and initialization. The model is in paddle format.

 **Parameter**

-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path. Merely passing an empty string when the model is in ONNX format
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
 > * **model_format**(ModelFormat): Model format. Paddle format by default
@@ -139,7 +139,7 @@ Users can modify the following pre-processing parameters to their needs, which a

 > > * **max_side_len**(int): The long side’s maximum size of the oriented view before detection. The long side will be resized to this size when exceeding the value. And the short side will be scaled in equal proportion. Default 960
 > > * **det_db_thresh**(double): The binarization threshold of the prediction image from DB models. Default 0.3
-> > * **det_db_box_thresh**(double): The threshold for the output box of DB models, below which the predicted box is discarded. Default 0.6 
+> > * **det_db_box_thresh**(double): The threshold for the output box of DB models, below which the predicted box is discarded. Default 0.6
 > > * **det_db_unclip_ratio**(double): The expansion ratio of the DB model output box. Default 1.5
 > > * **det_db_score_mode**(string): The way to calculate the average score of the text box in DB post-processing. Default slow, which is identical to the calculation of the polygon area’s average score
 > > * **use_dilation**(bool): Whether to expand the feature map from the detection. Default False
--- a/examples/vision/ocr/PP-OCRv2/cpp/infer_static_shape.cc
+++ b/examples/vision/ocr/PP-OCRv2/cpp/infer_static_shape.cc
@@ -19,7 +19,12 @@ const char sep = '\\';
 const char sep = '/';
 #endif

-void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model_dir, const std::string& rec_model_dir, const std::string& rec_label_file, const std::string& image_file, const fastdeploy::RuntimeOption& option) {
+void InitAndInfer(const std::string& det_model_dir,
+                  const std::string& cls_model_dir,
+                  const std::string& rec_model_dir,
+                  const std::string& rec_label_file,
+                  const std::string& image_file,
+                  const fastdeploy::RuntimeOption& option) {
  auto det_model_file = det_model_dir + sep + "inference.pdmodel";
  auto det_params_file = det_model_dir + sep + "inference.pdiparams";

@@ -33,33 +38,40 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
  auto cls_option = option;
  auto rec_option = option;

-  auto det_model = fastdeploy::vision::ocr::DBDetector(det_model_file, det_params_file, det_option);
-  auto cls_model = fastdeploy::vision::ocr::Classifier(cls_model_file, cls_params_file, cls_option);
-  auto rec_model = fastdeploy::vision::ocr::Recognizer(rec_model_file, rec_params_file, rec_label_file, rec_option);
+  auto det_model = fastdeploy::vision::ocr::DBDetector(
+      det_model_file, det_params_file, det_option);
+  auto cls_model = fastdeploy::vision::ocr::Classifier(
+      cls_model_file, cls_params_file, cls_option);
+  auto rec_model = fastdeploy::vision::ocr::Recognizer(
+      rec_model_file, rec_params_file, rec_label_file, rec_option);

-  // Users could enable static shape infer for rec model when deploy PP-OCR on hardware 
-  // which can not support dynamic shape infer well, like Huawei Ascend series. 
+  // Users could enable static shape infer for rec model when deploy PP-OCR on
+  // hardware
+  // which can not support dynamic shape infer well, like Huawei Ascend series.
  rec_model.GetPreprocessor().SetStaticShapeInfer(true);

  assert(det_model.Initialized());
  assert(cls_model.Initialized());
  assert(rec_model.Initialized());

-  // The classification model is optional, so the PP-OCR can also be connected in series as follows
+  // The classification model is optional, so the PP-OCR can also be connected
+  // in series as follows
  // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model);
-  auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model);
+  auto ppocr_v2 =
+      fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model);

-  // When users enable static shape infer for rec model, the batch size of cls and rec model must to be set to 1.
+  // When users enable static shape infer for rec model, the batch size of cls
+  // and rec model must to be set to 1.
  ppocr_v2.SetClsBatchSize(1);
-  ppocr_v2.SetRecBatchSize(1); 
+  ppocr_v2.SetRecBatchSize(1);

-  if(!ppocr_v2.Initialized()){
+  if (!ppocr_v2.Initialized()) {
    std::cerr << "Failed to initialize PP-OCR." << std::endl;
    return;
  }

  auto im = cv::imread(image_file);
-  
+
  fastdeploy::vision::OCRResult result;
  if (!ppocr_v2.Predict(im, &result)) {
    std::cerr << "Failed to predict." << std::endl;
@@ -92,7 +104,7 @@ int main(int argc, char* argv[]) {
  int flag = std::atoi(argv[6]);

  if (flag == 0) {
-    option.UseCpu(); 
+    option.UseCpu();
  } else if (flag == 1) {
    option.UseAscend();
  }
@@ -102,6 +114,7 @@ int main(int argc, char* argv[]) {
  std::string rec_model_dir = argv[3];
  std::string rec_label_file = argv[4];
  std::string test_image = argv[5];
-  InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, test_image, option);
+  InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file,
+               test_image, option);
  return 0;
 }
--- a/examples/vision/ocr/PP-OCRv3/cpp/README.md
+++ b/examples/vision/ocr/PP-OCRv3/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # PPOCRv3 C++ Deployment Example

-This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT.

 Two steps before deployment

@@ -13,7 +13,7 @@ Taking the CPU inference on Linux as an example, the compilation test can be com
 ```
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
@@ -44,6 +44,8 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_
 ./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 3
 # KunlunXin XPU inference
 ./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 4
+# Huawei Ascend inference, need to use the infer_static_shape_demo, if the user needs to predict the image continuously, the input image size needs to be prepared as a uniform size.
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 1
 ```

 The above command works for Linux or MacOS. For SDK in Windows, refer to:
--- a/Show More
+++ b/Show More