Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy into new_dev

2025-12-24 13:28:13 +08:00 · 2023-02-27 08:30:38 +00:00
parent 8c3046fe17 7e5e690367
commit 25eae73534
65 changed files with 2544 additions and 499 deletions
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -131,9 +131,9 @@ endif()

 if(ENABLE_RKNPU2_BACKEND)
  if(RKNN2_TARGET_SOC STREQUAL "RK356X")
-    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
+    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
  elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
-    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
+    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
  else ()
    message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
  endif()
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -36,3 +36,4 @@ else()
  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
 endif()
+install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR})
--- a/c_api/fastdeploy_capi/config.h
+++ b/c_api/fastdeploy_capi/config.h
@@ -1,22 +0,0 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#ifndef ENABLE_VISION
-#define ENABLE_VISION
-#endif
-
-#ifndef ENABLE_TEXT
-/* #undef ENABLE_TEXT */
-#endif
--- a/c_api/fastdeploy_capi/types_internal.cc
+++ b/c_api/fastdeploy_capi/types_internal.cc
@@ -31,6 +31,9 @@ DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(
 DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(OCRResult,
                                                        fd_ocr_result_wrapper,
                                                        ocr_result)
+// SegmentationResult
+DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    SegmentationResult, fd_segmentation_result_wrapper, segmentation_result)

 // Models:

@@ -147,6 +150,12 @@ DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
 DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
    PPOCRv3, fd_ppocrv3_wrapper, ppocrv3_model);

+// Segmentation models
+
+// PaddleSegModel
+DECL_AND_IMPLEMENT_SEGMENTATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleSegModel, fd_paddleseg_model_wrapper, segmentation_model);
+
 #endif

 std::unique_ptr<fastdeploy::RuntimeOption>&
--- a/c_api/fastdeploy_capi/types_internal.h
+++ b/c_api/fastdeploy_capi/types_internal.h
@@ -27,6 +27,7 @@
 #include "fastdeploy/vision/ocr/ppocr/recognizer.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
 #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
+#include "fastdeploy/vision/segmentation/ppseg/model.h"

 #define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \
  std::unique_ptr<fastdeploy::vision::typename> varname; \
@@ -49,6 +50,10 @@
  std::unique_ptr<fastdeploy::pipeline::typename> varname; \
 } FD_C_##typename##Wrapper

+#define DEFINE_SEGMENTATION_MODEL_WRAPPER_STRUCT(typename, varname)  typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::segmentation::typename> varname; \
+} FD_C_##typename##Wrapper
+
 // -------------  belows are wrapper struct define --------------------- //

 // Results:
@@ -63,6 +68,8 @@ DEFINE_RESULT_WRAPPER_STRUCT(DetectionResult, detection_result);
 // OCRResult
 DEFINE_RESULT_WRAPPER_STRUCT(OCRResult, ocr_result);

+// Segmentation Result
+DEFINE_RESULT_WRAPPER_STRUCT(SegmentationResult, segmentation_result);

 // Models:

@@ -153,6 +160,10 @@ DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model);
 // PPOCRv3
 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model);

+// Segmentation models
+
+// PaddleSegModel
+DEFINE_SEGMENTATION_MODEL_WRAPPER_STRUCT(PaddleSegModel, segmentation_model);

 // -------------  belows are function declaration for get ptr from wrapper --------------------- //

@@ -177,6 +188,10 @@ FD_C_CheckAndConvert##typename##Wrapper( \
 FD_C_CheckAndConvert##typename##Wrapper( \
    FD_C_##typename##Wrapper* varname)

+#define DECLARE_SEGMENTATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, varname) std::unique_ptr<fastdeploy::vision::segmentation::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* varname)
+

 namespace fastdeploy {

@@ -194,6 +209,10 @@ DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(DetectionResult,
 DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(OCRResult,
                                             fd_ocr_result_wrapper);

+// SegementationResult
+DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(SegmentationResult,
+                                             fd_segmentation_result_wrapper);
+

 // Models:

@@ -324,6 +343,12 @@ DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper
 // PPOCRv3
 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper);

+// Segmentation models
+
+// PaddleSegModel
+DECLARE_SEGMENTATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleSegModel, fd_paddleseg_model_wrapper);
+
 }  // namespace fastdeploy

 #endif
@@ -383,3 +408,11 @@ FD_C_CheckAndConvert##typename##Wrapper( \
           "The pointer of " #var_wrapper_name " shouldn't be nullptr."); \
  return var_wrapper_name->var_ptr_name; \
 }
+
+#define DECL_AND_IMPLEMENT_SEGMENTATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, var_wrapper_name, var_ptr_name) std::unique_ptr<fastdeploy::vision::segmentation::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* var_wrapper_name) { \
+  FDASSERT(var_wrapper_name != nullptr, \
+           "The pointer of " #var_wrapper_name " shouldn't be nullptr."); \
+  return var_wrapper_name->var_ptr_name; \
+}
--- a/c_api/fastdeploy_capi/vision.h
+++ b/c_api/fastdeploy_capi/vision.h
@@ -19,6 +19,7 @@
 #include "fastdeploy_capi/vision/classification/ppcls/model.h"
 #include "fastdeploy_capi/vision/detection/ppdet/model.h"
 #include "fastdeploy_capi/vision/ocr/ppocr/model.h"
+#include "fastdeploy_capi/vision/segmentation/ppseg/model.h"
 #include "fastdeploy_capi/vision/result.h"
 #include "fastdeploy_capi/vision/visualize.h"
 #endif
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
@@ -55,10 +55,10 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict(

  bool successful = paddleclas_model->Predict(im, classify_result.get());
  if (successful) {
-    FD_C_ClassifyResult* res =
-        FD_C_ClassifyResultWrapperGetData(fd_c_classify_result_wrapper);
-    *fd_c_classify_result = *res;
+    FD_C_ClassifyResultWrapperToCResult(fd_c_classify_result_wrapper,
+                                        fd_c_classify_result);
  }
+  FD_C_DestroyClassifyResultWrapper(fd_c_classify_result_wrapper);
  return successful;
 }

@@ -69,36 +69,17 @@ FD_C_Bool FD_C_PaddleClasModelWrapperInitialized(
  return paddleclas_model->Initialized();
 }

-FD_C_ClassifyResult* FD_C_ClassifyResultToC(
-    fastdeploy::vision::ClassifyResult* classify_result) {
-  // Internal use, transfer fastdeploy::vision::ClassifyResult to
-  // FD_C_ClassifyResult
-  FD_C_ClassifyResult* fd_c_classify_result_data = new FD_C_ClassifyResult();
-  // copy label_ids
-  fd_c_classify_result_data->label_ids.size = classify_result->label_ids.size();
-  fd_c_classify_result_data->label_ids.data =
-      new int32_t[fd_c_classify_result_data->label_ids.size];
-  memcpy(fd_c_classify_result_data->label_ids.data,
-         classify_result->label_ids.data(),
-         sizeof(int32_t) * fd_c_classify_result_data->label_ids.size);
-  // copy scores
-  fd_c_classify_result_data->scores.size = classify_result->scores.size();
-  fd_c_classify_result_data->scores.data =
-      new float[fd_c_classify_result_data->scores.size];
-  memcpy(fd_c_classify_result_data->scores.data, classify_result->scores.data(),
-         sizeof(float) * fd_c_classify_result_data->scores.size);
-  fd_c_classify_result_data->type =
-      static_cast<FD_C_ResultType>(classify_result->type);
-  return fd_c_classify_result_data;
-}
-
 FD_C_Bool FD_C_PaddleClasModelWrapperBatchPredict(
    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
    FD_C_OneDimMat imgs, FD_C_OneDimClassifyResult* results) {
  std::vector<cv::Mat> imgs_vec;
+  std::vector<FD_C_ClassifyResultWrapper*> results_wrapper_out;
  std::vector<fastdeploy::vision::ClassifyResult> results_out;
  for (int i = 0; i < imgs.size; i++) {
    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
+    FD_C_ClassifyResultWrapper* fd_classify_result_wrapper =
+        FD_C_CreateClassifyResultWrapper();
+    results_wrapper_out.push_back(fd_classify_result_wrapper);
  }
  auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE(
      PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper);
@@ -108,9 +89,16 @@ FD_C_Bool FD_C_PaddleClasModelWrapperBatchPredict(
    results->size = results_out.size();
    results->data = new FD_C_ClassifyResult[results->size];
    for (int i = 0; i < results_out.size(); i++) {
-      results->data[i] = *FD_C_ClassifyResultToC(&results_out[i]);
+      (*CHECK_AND_CONVERT_FD_TYPE(ClassifyResultWrapper,
+                                  results_wrapper_out[i])) =
+          std::move(results_out[i]);
+      FD_C_ClassifyResultWrapperToCResult(results_wrapper_out[i],
+                                          &results->data[i]);
    }
  }
+  for (int i = 0; i < results_out.size(); i++) {
+    FD_C_DestroyClassifyResultWrapper(results_wrapper_out[i]);
+  }
  return successful;
 }

--- a/c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h
@@ -61,11 +61,10 @@ FD_C_Destroy##model_type##Wrapper(__fd_take FD_C_##model_type##Wrapper* wrapper_
      DetectionResultWrapper, fd_c_detection_result_wrapper);                  \
  bool successful = model->Predict(im, detection_result.get());                \
  if (successful) {                                                            \
-    FD_C_DetectionResult* res =                                                \
-        FD_C_DetectionResultWrapperGetData(fd_c_detection_result_wrapper);     \
-    *fd_c_detection_result = *res;                                             \
-  }                                                                            \
-  return successful
+        FD_C_DetectionResultWrapperToCResult(fd_c_detection_result_wrapper, fd_c_detection_result); \
+  } \
+FD_C_DestroyDetectionResultWrapper(fd_c_detection_result_wrapper); \
+return successful

 #define IMPLEMENT_INITIALIZED_FUNCTION(model_type, wrapper_var_name)   auto& model = \
      CHECK_AND_CONVERT_FD_TYPE(model_type##Wrapper, wrapper_var_name); \
@@ -73,8 +72,11 @@ return model->Initialized();

 #define IMPLEMENT_BATCH_PREDICT_FUNCTION(model_type, wrapper_var_name) std::vector<cv::Mat> imgs_vec; \
  std::vector<fastdeploy::vision::DetectionResult> results_out; \
+  std::vector<FD_C_DetectionResultWrapper*> results_wrapper_out; \
  for (int i = 0; i < imgs.size; i++) { \
    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i]))); \
+    FD_C_DetectionResultWrapper* fd_detection_result_wrapper = FD_C_CreateDetectionResultWrapper(); \
+    results_wrapper_out.push_back(fd_detection_result_wrapper); \
  } \
  auto& model = \
      CHECK_AND_CONVERT_FD_TYPE(model_type##Wrapper, wrapper_var_name); \
@@ -83,9 +85,14 @@ return model->Initialized();
    results->size = results_out.size(); \
    results->data = new FD_C_DetectionResult[results->size]; \
    for (int i = 0; i < results_out.size(); i++) { \
-      results->data[i] = *FD_C_DetectionResultToC(&results_out[i]); \
+      (*CHECK_AND_CONVERT_FD_TYPE(DetectionResultWrapper, \
+                                  results_wrapper_out[i])) = std::move(results_out[i]); \
+      FD_C_DetectionResultWrapperToCResult(results_wrapper_out[i], &results->data[i]); \
    } \
  } \
+  for (int i = 0; i < results_out.size(); i++) { \
+    FD_C_DestroyDetectionResultWrapper(results_wrapper_out[i]); \
+  }\
  return successful;

 #define DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(model_type, var_name) FD_C_##model_type##Wrapper* FD_C_Create##model_type##Wrapper(\
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
@@ -46,67 +46,6 @@ FD_C_Bool FD_C_PPYOLOEWrapperInitialized(
  IMPLEMENT_INITIALIZED_FUNCTION(PPYOLOE, fd_ppyoloe_wrapper);
 }

-FD_C_DetectionResult* FD_C_DetectionResultToC(
-    fastdeploy::vision::DetectionResult* detection_result) {
-  // Internal use, transfer fastdeploy::vision::DetectionResult to
-  // FD_C_DetectionResult
-  FD_C_DetectionResult* fd_c_detection_result = new FD_C_DetectionResult();
-  // copy boxes
-  const int boxes_coordinate_dim = 4;
-  fd_c_detection_result->boxes.size = detection_result->boxes.size();
-  fd_c_detection_result->boxes.data =
-      new FD_C_OneDimArrayFloat[fd_c_detection_result->boxes.size];
-  for (size_t i = 0; i < detection_result->boxes.size(); i++) {
-    fd_c_detection_result->boxes.data[i].size = boxes_coordinate_dim;
-    fd_c_detection_result->boxes.data[i].data = new float[boxes_coordinate_dim];
-    for (size_t j = 0; j < boxes_coordinate_dim; j++) {
-      fd_c_detection_result->boxes.data[i].data[j] =
-          detection_result->boxes[i][j];
-    }
-  }
-  // copy scores
-  fd_c_detection_result->scores.size = detection_result->scores.size();
-  fd_c_detection_result->scores.data =
-      new float[fd_c_detection_result->scores.size];
-  memcpy(fd_c_detection_result->scores.data, detection_result->scores.data(),
-         sizeof(float) * fd_c_detection_result->scores.size);
-  // copy label_ids
-  fd_c_detection_result->label_ids.size = detection_result->label_ids.size();
-  fd_c_detection_result->label_ids.data =
-      new int32_t[fd_c_detection_result->label_ids.size];
-  memcpy(fd_c_detection_result->label_ids.data,
-         detection_result->label_ids.data(),
-         sizeof(int32_t) * fd_c_detection_result->label_ids.size);
-  // copy masks
-  fd_c_detection_result->masks.size = detection_result->masks.size();
-  fd_c_detection_result->masks.data =
-      new FD_C_Mask[fd_c_detection_result->masks.size];
-  for (size_t i = 0; i < detection_result->masks.size(); i++) {
-    // copy data in mask
-    fd_c_detection_result->masks.data[i].data.size =
-        detection_result->masks[i].data.size();
-    fd_c_detection_result->masks.data[i].data.data =
-        new uint8_t[detection_result->masks[i].data.size()];
-    memcpy(fd_c_detection_result->masks.data[i].data.data,
-           detection_result->masks[i].data.data(),
-           sizeof(uint8_t) * detection_result->masks[i].data.size());
-    // copy shape in mask
-    fd_c_detection_result->masks.data[i].shape.size =
-        detection_result->masks[i].shape.size();
-    fd_c_detection_result->masks.data[i].shape.data =
-        new int64_t[detection_result->masks[i].shape.size()];
-    memcpy(fd_c_detection_result->masks.data[i].shape.data,
-           detection_result->masks[i].shape.data(),
-           sizeof(int64_t) * detection_result->masks[i].shape.size());
-    fd_c_detection_result->masks.data[i].type =
-        static_cast<FD_C_ResultType>(detection_result->masks[i].type);
-  }
-  fd_c_detection_result->contain_masks = detection_result->contain_masks;
-  fd_c_detection_result->type =
-      static_cast<FD_C_ResultType>(detection_result->type);
-  return fd_c_detection_result;
-}
-
 FD_C_Bool FD_C_PPYOLOEWrapperBatchPredict(
    FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper, FD_C_OneDimMat imgs,
    FD_C_OneDimDetectionResult* results) {
--- a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc
+++ b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc
@@ -353,72 +353,26 @@ FD_C_Bool FD_C_PPOCRv2WrapperPredict(FD_C_PPOCRv2Wrapper* fd_c_ppocrv2_wrapper,

  bool successful = model->Predict(im, ocr_result.get());
  if (successful) {
-    FD_C_OCRResult* res = FD_C_OCRResultWrapperGetData(fd_c_ocr_result_wrapper);
-    *fd_c_ocr_result = *res;
+    FD_C_OCRResultWrapperToCResult(fd_c_ocr_result_wrapper, fd_c_ocr_result);
  }
+  FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
  return successful;
 }

 PIPELINE_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PPOCRv2,
                                                    fd_c_ppocrv2_wrapper)

-FD_C_OCRResult* FD_C_OCRResultToC(fastdeploy::vision::OCRResult* ocr_result) {
-  // Internal use, transfer fastdeploy::vision::OCRResult to
-  // FD_C_OCRResult
-  FD_C_OCRResult* fd_c_ocr_result = new FD_C_OCRResult();
-  // copy boxes
-  const int boxes_coordinate_dim = 8;
-  fd_c_ocr_result->boxes.size = ocr_result->boxes.size();
-  fd_c_ocr_result->boxes.data =
-      new FD_C_OneDimArrayInt32[fd_c_ocr_result->boxes.size];
-  for (size_t i = 0; i < ocr_result->boxes.size(); i++) {
-    fd_c_ocr_result->boxes.data[i].size = boxes_coordinate_dim;
-    fd_c_ocr_result->boxes.data[i].data = new int[boxes_coordinate_dim];
-    for (size_t j = 0; j < boxes_coordinate_dim; j++) {
-      fd_c_ocr_result->boxes.data[i].data[j] = ocr_result->boxes[i][j];
-    }
-  }
-  // copy text
-  fd_c_ocr_result->text.size = ocr_result->text.size();
-  fd_c_ocr_result->text.data = new FD_C_Cstr[fd_c_ocr_result->text.size];
-  for (size_t i = 0; i < ocr_result->text.size(); i++) {
-    fd_c_ocr_result->text.data[i].size = ocr_result->text[i].length();
-    fd_c_ocr_result->text.data[i].data =
-        new char[ocr_result->text[i].length() + 1];
-    strncpy(fd_c_ocr_result->text.data[i].data, ocr_result->text[i].c_str(),
-            ocr_result->text[i].length());
-  }
-
-  // copy rec_scores
-  fd_c_ocr_result->rec_scores.size = ocr_result->rec_scores.size();
-  fd_c_ocr_result->rec_scores.data =
-      new float[fd_c_ocr_result->rec_scores.size];
-  memcpy(fd_c_ocr_result->rec_scores.data, ocr_result->rec_scores.data(),
-         sizeof(float) * fd_c_ocr_result->rec_scores.size);
-  // copy cls_scores
-  fd_c_ocr_result->cls_scores.size = ocr_result->cls_scores.size();
-  fd_c_ocr_result->cls_scores.data =
-      new float[fd_c_ocr_result->cls_scores.size];
-  memcpy(fd_c_ocr_result->cls_scores.data, ocr_result->cls_scores.data(),
-         sizeof(float) * fd_c_ocr_result->cls_scores.size);
-  // copy cls_labels
-  fd_c_ocr_result->cls_labels.size = ocr_result->cls_labels.size();
-  fd_c_ocr_result->cls_labels.data =
-      new int32_t[fd_c_ocr_result->cls_labels.size];
-  memcpy(fd_c_ocr_result->cls_labels.data, ocr_result->cls_labels.data(),
-         sizeof(int32_t) * fd_c_ocr_result->cls_labels.size);
-  // copy type
-  fd_c_ocr_result->type = static_cast<FD_C_ResultType>(ocr_result->type);
-  return fd_c_ocr_result;
-}
-
 FD_C_Bool FD_C_PPOCRv2WrapperBatchPredict(
    FD_C_PPOCRv2Wrapper* fd_c_ppocrv2_wrapper, FD_C_OneDimMat imgs,
    FD_C_OneDimOCRResult* results) {
  std::vector<cv::Mat> imgs_vec;
+  std::vector<FD_C_OCRResultWrapper*> results_wrapper_out;
  std::vector<fastdeploy::vision::OCRResult> results_out;
  for (int i = 0; i < imgs.size; i++) {
    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
+    FD_C_OCRResultWrapper* fd_ocr_result_wrapper =
+        FD_C_CreateOCRResultWrapper();
+    results_wrapper_out.push_back(fd_ocr_result_wrapper);
  }
  auto& model = CHECK_AND_CONVERT_FD_TYPE(PPOCRv2Wrapper, fd_c_ppocrv2_wrapper);
  bool successful = model->BatchPredict(imgs_vec, &results_out);
@@ -427,9 +381,14 @@ FD_C_Bool FD_C_PPOCRv2WrapperBatchPredict(
    results->size = results_out.size();
    results->data = new FD_C_OCRResult[results->size];
    for (int i = 0; i < results_out.size(); i++) {
-      results->data[i] = *FD_C_OCRResultToC(&results_out[i]);
+      (*CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, results_wrapper_out[i])) =
+          std::move(results_out[i]);
+      FD_C_OCRResultWrapperToCResult(results_wrapper_out[i], &results->data[i]);
    }
  }
+  for (int i = 0; i < results_out.size(); i++) {
+    FD_C_DestroyOCRResultWrapper(results_wrapper_out[i]);
+  }
  return successful;
 }

@@ -468,9 +427,9 @@ FD_C_Bool FD_C_PPOCRv3WrapperPredict(FD_C_PPOCRv3Wrapper* fd_c_ppocrv3_wrapper,

  bool successful = model->Predict(im, ocr_result.get());
  if (successful) {
-    FD_C_OCRResult* res = FD_C_OCRResultWrapperGetData(fd_c_ocr_result_wrapper);
-    *fd_c_ocr_result = *res;
+    FD_C_OCRResultWrapperToCResult(fd_c_ocr_result_wrapper, fd_c_ocr_result);
  }
+  FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
  return successful;
 }

@@ -481,9 +440,13 @@ FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
    FD_C_PPOCRv3Wrapper* fd_c_ppocrv3_wrapper, FD_C_OneDimMat imgs,
    FD_C_OneDimOCRResult* results) {
  std::vector<cv::Mat> imgs_vec;
+  std::vector<FD_C_OCRResultWrapper*> results_wrapper_out;
  std::vector<fastdeploy::vision::OCRResult> results_out;
  for (int i = 0; i < imgs.size; i++) {
    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
+    FD_C_OCRResultWrapper* fd_ocr_result_wrapper =
+        FD_C_CreateOCRResultWrapper();
+    results_wrapper_out.push_back(fd_ocr_result_wrapper);
  }
  auto& model = CHECK_AND_CONVERT_FD_TYPE(PPOCRv3Wrapper, fd_c_ppocrv3_wrapper);
  bool successful = model->BatchPredict(imgs_vec, &results_out);
@@ -492,9 +455,14 @@ FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
    results->size = results_out.size();
    results->data = new FD_C_OCRResult[results->size];
    for (int i = 0; i < results_out.size(); i++) {
-      results->data[i] = *FD_C_OCRResultToC(&results_out[i]);
+      (*CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, results_wrapper_out[i])) =
+          std::move(results_out[i]);
+      FD_C_OCRResultWrapperToCResult(results_wrapper_out[i], &results->data[i]);
    }
  }
+  for (int i = 0; i < results_out.size(); i++) {
+    FD_C_DestroyOCRResultWrapper(results_wrapper_out[i]);
+  }
  return successful;
 }

--- a/c_api/fastdeploy_capi/vision/result.cc
+++ b/c_api/fastdeploy_capi/vision/result.cc
@@ -47,11 +47,11 @@ void FD_C_DestroyClassifyResult(
  delete fd_c_classify_result;
 }

-FD_C_ClassifyResult* FD_C_ClassifyResultWrapperGetData(
-    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+void FD_C_ClassifyResultWrapperToCResult(
+    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper,
+    __fd_keep FD_C_ClassifyResult* fd_c_classify_result_data) {
  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
      ClassifyResultWrapper, fd_c_classify_result_wrapper);
-  FD_C_ClassifyResult* fd_c_classify_result_data = new FD_C_ClassifyResult();
  // copy label_ids
  fd_c_classify_result_data->label_ids.size = classify_result->label_ids.size();
  fd_c_classify_result_data->label_ids.data =
@@ -67,10 +67,10 @@ FD_C_ClassifyResult* FD_C_ClassifyResultWrapperGetData(
         sizeof(float) * fd_c_classify_result_data->scores.size);
  fd_c_classify_result_data->type =
      static_cast<FD_C_ResultType>(classify_result->type);
-  return fd_c_classify_result_data;
+  return;
 }

-FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
+FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromCResult(
    __fd_keep FD_C_ClassifyResult* fd_c_classify_result) {
  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
      FD_C_CreateClassifyResultWrapper();
@@ -90,14 +90,16 @@ FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
  return fd_c_classify_result_wrapper;
 }

-char* FD_C_ClassifyResultWrapperStr(
-    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+void FD_C_ClassifyResultStr(FD_C_ClassifyResult* fd_c_classify_result,
+                            char* str_buffer) {
+  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
+      FD_C_CreateClassifyResultWrapperFromCResult(fd_c_classify_result);
  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
      ClassifyResultWrapper, fd_c_classify_result_wrapper);
  std::string information = classify_result->Str();
-  char* cstr = new char[information.length() + 1];
-  std::strcpy(cstr, information.c_str());
-  return cstr;
+  std::strcpy(str_buffer, information.c_str());
+  FD_C_DestroyClassifyResultWrapper(fd_c_classify_result_wrapper);
+  return;
 }

 // Detection Results
@@ -136,11 +138,11 @@ void FD_C_DestroyDetectionResult(
  delete fd_c_detection_result;
 }

-FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
-    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+void FD_C_DetectionResultWrapperToCResult(
+    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper,
+    __fd_keep FD_C_DetectionResult* fd_c_detection_result) {
  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
      DetectionResultWrapper, fd_c_detection_result_wrapper);
-  FD_C_DetectionResult* fd_c_detection_result = new FD_C_DetectionResult();
  // copy boxes
  const int boxes_coordinate_dim = 4;
  fd_c_detection_result->boxes.size = detection_result->boxes.size();
@@ -194,10 +196,10 @@ FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
  fd_c_detection_result->contain_masks = detection_result->contain_masks;
  fd_c_detection_result->type =
      static_cast<FD_C_ResultType>(detection_result->type);
-  return fd_c_detection_result;
+  return;
 }

-FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
+FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromCResult(
    __fd_keep FD_C_DetectionResult* fd_c_detection_result) {
  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
      FD_C_CreateDetectionResultWrapper();
@@ -248,14 +250,16 @@ FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
  return fd_c_detection_result_wrapper;
 }

-char* FD_C_DetectionResultWrapperStr(
-    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+void FD_C_DetectionResultStr(FD_C_DetectionResult* fd_c_detection_result,
+                             char* str_buffer) {
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
+      FD_C_CreateDetectionResultWrapperFromCResult(fd_c_detection_result);
  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
      DetectionResultWrapper, fd_c_detection_result_wrapper);
  std::string information = detection_result->Str();
-  char* cstr = new char[information.length() + 1];
-  std::strcpy(cstr, information.c_str());
-  return cstr;
+  std::strcpy(str_buffer, information.c_str());
+  FD_C_DestroyDetectionResultWrapper(fd_c_detection_result_wrapper);
+  return;
 }

 // OCR Results
@@ -294,11 +298,11 @@ void FD_C_DestroyOCRResult(__fd_take FD_C_OCRResult* fd_c_ocr_result) {
  delete fd_c_ocr_result;
 }

-FD_C_OCRResult* FD_C_OCRResultWrapperGetData(
-    __fd_keep FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper) {
+void FD_C_OCRResultWrapperToCResult(
+    __fd_keep FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper,
+    __fd_keep FD_C_OCRResult* fd_c_ocr_result) {
  auto& ocr_result =
      CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper);
-  FD_C_OCRResult* fd_c_ocr_result = new FD_C_OCRResult();
  // copy boxes
  const int boxes_coordinate_dim = 8;
  fd_c_ocr_result->boxes.size = ocr_result->boxes.size();
@@ -342,10 +346,10 @@ FD_C_OCRResult* FD_C_OCRResultWrapperGetData(
         sizeof(int32_t) * fd_c_ocr_result->cls_labels.size);
  // copy type
  fd_c_ocr_result->type = static_cast<FD_C_ResultType>(ocr_result->type);
-  return fd_c_ocr_result;
+  return;
 }

-FD_C_OCRResultWrapper* FD_C_CreateOCRResultWrapperFromData(
+FD_C_OCRResultWrapper* FD_C_CreateOCRResultWrapperFromCResult(
    __fd_keep FD_C_OCRResult* fd_c_ocr_result) {
  FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper =
      FD_C_CreateOCRResultWrapper();
@@ -389,13 +393,131 @@ FD_C_OCRResultWrapper* FD_C_CreateOCRResultWrapperFromData(
  return fd_c_ocr_result_wrapper;
 }

-char* FD_C_OCRResultWrapperStr(FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper) {
+void FD_C_OCRResultStr(FD_C_OCRResult* fd_c_ocr_result, char* str_buffer) {
+  FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper =
+      FD_C_CreateOCRResultWrapperFromCResult(fd_c_ocr_result);
  auto& ocr_result =
      CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper);
  std::string information = ocr_result->Str();
-  char* cstr = new char[information.length() + 1];
-  std::strcpy(cstr, information.c_str());
-  return cstr;
+  std::strcpy(str_buffer, information.c_str());
+  FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
+  return;
+}
+
+// Segmentation Results
+
+FD_C_SegmentationResultWrapper* FD_C_CreateSegmentationResultWrapper() {
+  FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper =
+      new FD_C_SegmentationResultWrapper();
+  fd_c_segmentation_result_wrapper->segmentation_result =
+      std::unique_ptr<fastdeploy::vision::SegmentationResult>(
+          new fastdeploy::vision::SegmentationResult());
+  return fd_c_segmentation_result_wrapper;
+}
+
+void FD_C_DestroySegmentationResultWrapper(
+    __fd_take FD_C_SegmentationResultWrapper*
+        fd_c_segmentation_result_wrapper) {
+  delete fd_c_segmentation_result_wrapper;
+}
+
+void FD_C_DestroySegmentationResult(
+    __fd_take FD_C_SegmentationResult* fd_c_segmentation_result) {
+  if (fd_c_segmentation_result == nullptr) return;
+  // delete label_map
+  delete[] fd_c_segmentation_result->label_map.data;
+  // delete score_map
+  delete[] fd_c_segmentation_result->score_map.data;
+  // delete shape
+  delete[] fd_c_segmentation_result->shape.data;
+  delete fd_c_segmentation_result;
+}
+
+void FD_C_SegmentationResultWrapperToCResult(
+    __fd_keep FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper,
+    __fd_keep FD_C_SegmentationResult* fd_c_segmentation_result) {
+  auto& segmentation_result = CHECK_AND_CONVERT_FD_TYPE(
+      SegmentationResultWrapper, fd_c_segmentation_result_wrapper);
+
+  // copy label_map
+  fd_c_segmentation_result->label_map.size =
+      segmentation_result->label_map.size();
+  fd_c_segmentation_result->label_map.data =
+      new uint8_t[fd_c_segmentation_result->label_map.size];
+  memcpy(fd_c_segmentation_result->label_map.data,
+         segmentation_result->label_map.data(),
+         sizeof(uint8_t) * fd_c_segmentation_result->label_map.size);
+  // copy score_map
+  fd_c_segmentation_result->score_map.size =
+      segmentation_result->score_map.size();
+  fd_c_segmentation_result->score_map.data =
+      new float[fd_c_segmentation_result->score_map.size];
+  memcpy(fd_c_segmentation_result->score_map.data,
+         segmentation_result->score_map.data(),
+         sizeof(float) * fd_c_segmentation_result->score_map.size);
+  // copy shape
+  fd_c_segmentation_result->shape.size = segmentation_result->shape.size();
+  fd_c_segmentation_result->shape.data =
+      new int64_t[fd_c_segmentation_result->shape.size];
+  memcpy(fd_c_segmentation_result->shape.data,
+         segmentation_result->shape.data(),
+         sizeof(int64_t) * fd_c_segmentation_result->shape.size);
+  // copy contain_score_map
+  fd_c_segmentation_result->contain_score_map =
+      segmentation_result->contain_score_map;
+  // copy type
+  fd_c_segmentation_result->type =
+      static_cast<FD_C_ResultType>(segmentation_result->type);
+  return;
+}
+
+FD_C_SegmentationResultWrapper* FD_C_CreateSegmentationResultWrapperFromCResult(
+    __fd_keep FD_C_SegmentationResult* fd_c_segmentation_result) {
+  FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper =
+      FD_C_CreateSegmentationResultWrapper();
+  auto& segmentation_result = CHECK_AND_CONVERT_FD_TYPE(
+      SegmentationResultWrapper, fd_c_segmentation_result_wrapper);
+
+  // copy label_map
+  segmentation_result->label_map.resize(
+      fd_c_segmentation_result->label_map.size);
+  memcpy(segmentation_result->label_map.data(),
+         fd_c_segmentation_result->label_map.data,
+         sizeof(uint8_t) * fd_c_segmentation_result->label_map.size);
+
+  // copy score_map
+  segmentation_result->score_map.resize(
+      fd_c_segmentation_result->score_map.size);
+  memcpy(segmentation_result->score_map.data(),
+         fd_c_segmentation_result->score_map.data,
+         sizeof(float) * fd_c_segmentation_result->score_map.size);
+
+  // copy shape
+  segmentation_result->shape.resize(fd_c_segmentation_result->shape.size);
+  memcpy(segmentation_result->shape.data(),
+         fd_c_segmentation_result->shape.data,
+         sizeof(int64_t) * fd_c_segmentation_result->shape.size);
+
+  // copy contain_score_map
+  segmentation_result->contain_score_map =
+      fd_c_segmentation_result->contain_score_map;
+  // copy type
+  segmentation_result->type = static_cast<fastdeploy::vision::ResultType>(
+      fd_c_segmentation_result->type);
+
+  return fd_c_segmentation_result_wrapper;
+}
+
+void FD_C_SegmentationResultStr(
+    FD_C_SegmentationResult* fd_c_segmentation_result, char* str_buffer) {
+  FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper =
+      FD_C_CreateSegmentationResultWrapperFromCResult(fd_c_segmentation_result);
+  auto& segmentation_result = CHECK_AND_CONVERT_FD_TYPE(
+      SegmentationResultWrapper, fd_c_segmentation_result_wrapper);
+  std::string information = segmentation_result->Str();
+  std::strcpy(str_buffer, information.c_str());
+  FD_C_DestroySegmentationResultWrapper(fd_c_segmentation_result_wrapper);
+  return;
 }

 #ifdef __cplusplus
--- a/c_api/fastdeploy_capi/vision/result.h
+++ b/c_api/fastdeploy_capi/vision/result.h
@@ -20,6 +20,7 @@
 typedef struct FD_C_ClassifyResultWrapper FD_C_ClassifyResultWrapper;
 typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper;
 typedef struct FD_C_OCRResultWrapper FD_C_OCRResultWrapper;
+typedef struct FD_C_SegmentationResultWrapper FD_C_SegmentationResultWrapper;

 #ifdef __cplusplus
 extern "C" {
@@ -76,6 +77,20 @@ typedef struct FD_C_OneDimOCRResult {
  FD_C_OCRResult* data;
 } FD_C_OneDimOCRResult;

+typedef struct FD_C_SegmentationResult {
+  FD_C_OneDimArrayUint8 label_map;
+  FD_C_OneDimArrayFloat score_map;
+  FD_C_OneDimArrayInt64 shape;
+  FD_C_Bool contain_score_map;
+  FD_C_ResultType type;
+} FD_C_SegmentationResult;
+
+typedef struct FD_C_OneDimSegmentationResult {
+  size_t size;
+  FD_C_SegmentationResult* data;
+} FD_C_OneDimSegmentationResult;
+
+
 // Classification Results

 /** \brief Create a new FD_C_ClassifyResultWrapper object
@@ -105,11 +120,13 @@ FD_C_DestroyClassifyResult(__fd_take FD_C_ClassifyResult* fd_c_classify_result);
 /** \brief Get a FD_C_ClassifyResult object from FD_C_ClassifyResultWrapper object
 *
 * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object
- * \return Return a pointer to FD_C_ClassifyResult object
+ * \param[out]  fd_c_classify_result pointer to FD_C_ClassifyResult object used to store data
 */
-FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResult*
-FD_C_ClassifyResultWrapperGetData(
-    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_ClassifyResultWrapperToCResult(
+    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper,
+    __fd_keep FD_C_ClassifyResult* fd_c_classify_result);
+

 /** \brief Create a new FD_C_ClassifyResultWrapper object from FD_C_ClassifyResult object
 *
@@ -118,18 +135,19 @@ FD_C_ClassifyResultWrapperGetData(
 */

 FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper*
-FD_C_CreateClassifyResultWrapperFromData(
+FD_C_CreateClassifyResultWrapperFromCResult(
    __fd_keep FD_C_ClassifyResult* fd_c_classify_result);

 /** \brief Print ClassifyResult formated information
 *
- * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object
- * \return Return a string pointer
+ * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object
+ * \param[out] str_buffer used to store string
 */

-FASTDEPLOY_CAPI_EXPORT extern __fd_give char*
-FD_C_ClassifyResultWrapperStr(
-    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_ClassifyResultStr(
+    __fd_keep FD_C_ClassifyResult* fd_c_classify_result, char* str_buffer);
+

 // Detection Results

@@ -160,11 +178,12 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyDetectionResult(
 /** \brief Get a FD_C_DetectionResult object from FD_C_DetectionResultWrapper object
 *
 * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object
- * \return Return a pointer to FD_C_DetectionResult object
+ * \param[out]  fd_c_detection_result pointer to FD_C_DetectionResult object used to store data
 */
-FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResult*
-FD_C_DetectionResultWrapperGetData(
-    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_DetectionResultWrapperToCResult(
+    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper,
+    __fd_keep FD_C_DetectionResult* fd_c_detection_result);

 /** \brief Create a new FD_C_DetectionResultWrapper object from FD_C_DetectionResult object
 *
@@ -173,18 +192,19 @@ FD_C_DetectionResultWrapperGetData(
 */

 FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper*
-FD_C_CreateDetectionResultWrapperFromData(
+FD_C_CreateDetectionResultWrapperFromCResult(
    __fd_keep FD_C_DetectionResult* fd_c_detection_result);

+
 /** \brief Print DetectionResult formated information
 *
- * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object
- * \return Return a string pointer
+ * \param[in] fd_c_detection_result pointer to FD_C_DetectionResult object
+ * \param[out] str_buffer used to store string
 */

-FASTDEPLOY_CAPI_EXPORT extern __fd_give char*
-FD_C_DetectionResultWrapperStr(
-    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_DetectionResultStr(
+    __fd_keep FD_C_DetectionResult* fd_c_detection_result, char* str_buffer);


 // OCR Results
@@ -216,11 +236,12 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyOCRResult(
 /** \brief Get a FD_C_OCRResult object from FD_C_OCRResultWrapper object
 *
 * \param[in] fd_c_ocr_result_wrapper pointer to FD_C_OCRResultWrapper object
- * \return Return a pointer to FD_C_OCRResult object
+ * \param[out]  fd_c_ocr_result pointer to FD_C_OCRResult object used to store data
 */
-FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_OCRResult*
-FD_C_OCRResultWrapperGetData(
-    __fd_keep FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_OCRResultWrapperToCResult(
+    __fd_keep FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper,
+    __fd_keep FD_C_OCRResult* fd_c_ocr_result);

 /** \brief Create a new FD_C_OCRResultWrapper object from FD_C_OCRResult object
 *
@@ -229,18 +250,76 @@ FD_C_OCRResultWrapperGetData(
 */

 FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_OCRResultWrapper*
-FD_C_CreateOCRResultWrapperFromData(
+FD_C_CreateOCRResultWrapperFromCResult(
    __fd_keep FD_C_OCRResult* fd_c_ocr_result);

 /** \brief Print OCRResult formated information
 *
- * \param[in] fd_c_ocr_result_wrapper pointer to FD_C_OCRResultWrapper object
- * \return Return a string pointer
+ * \param[in] fd_c_ocr_result pointer to FD_C_OCRResult object
+ * \param[out] str_buffer used to store string
 */

-FASTDEPLOY_CAPI_EXPORT extern __fd_give char*
-FD_C_OCRResultWrapperStr(
-    __fd_keep FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper);
+FASTDEPLOY_CAPI_EXPORT extern  void
+FD_C_OCRResultStr(
+    __fd_keep FD_C_OCRResult* fd_c_ocr_result, char* str_buffer);
+
+
+// Segmentation Results
+
+/** \brief Create a new FD_C_SegmentationResultWrapper object
+ *
+ * \return Return a pointer to FD_C_SegmentationResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_SegmentationResultWrapper*
+FD_C_CreateSegmentationResultWrapper();
+
+/** \brief Destroy a FD_C_SegmentationResultWrapper object
+ *
+ * \param[in] fd_c_segmentation_result_wrapper pointer to FD_C_SegmentationResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroySegmentationResultWrapper(
+    __fd_take FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper);
+
+/** \brief Destroy a FD_C_SegmentationResult object
+ *
+ * \param[in] fd_c_segmentation_result pointer to FD_C_SegmentationResult object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroySegmentationResult(
+    __fd_take FD_C_SegmentationResult* fd_c_segmentation_result);
+
+/** \brief Get a FD_C_SegmentationResult object from FD_C_SegmentationResultWrapper object
+ *
+ * \param[in] fd_c_segmentation_result_wrapper pointer to FD_C_SegmentationResultWrapper object
+ * \param[out]  fd_c_segmentation_result pointer to FD_C_SegmentationResult object used to store data
+ */
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_SegmentationResultWrapperToCResult(
+    __fd_keep FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper,
+    __fd_keep FD_C_SegmentationResult* fd_c_segmentation_result);
+
+/** \brief Create a new FD_C_SegmentationResultWrapper object from FD_C_SegmentationResult object
+ *
+ * \param[in] fd_c_segmentation_result pointer to FD_C_SegmentationResult object
+ * \return Return a pointer to FD_C_SegmentationResultWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_SegmentationResultWrapper*
+FD_C_CreateSegmentationResultWrapperFromCResult(
+    __fd_keep FD_C_SegmentationResult* fd_c_segmentation_result);
+
+/** \brief Print SegmentationResult formated information
+ *
+ * \param[in] fd_c_segmentation_result pointer to FD_C_SegmentationResult object
+ * \param[out] str_buffer used to store string
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give void
+FD_C_SegmentationResultStr(
+    __fd_keep FD_C_SegmentationResult* fd_c_segmentation_result, char* str_buffer);
+


 #ifdef __cplusplus
--- a/c_api/fastdeploy_capi/vision/segmentation/ppseg/model.cc
+++ b/c_api/fastdeploy_capi/vision/segmentation/ppseg/model.cc
@@ -0,0 +1,107 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/vision/segmentation/ppseg/model.h"
+
+#include "fastdeploy_capi/types_internal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+FD_C_PaddleSegModelWrapper* FD_C_CreatePaddleSegModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format) {
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
+                                                   fd_c_runtime_option_wrapper);
+  FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper =
+      new FD_C_PaddleSegModelWrapper();
+  fd_c_paddleseg_model_wrapper->segmentation_model =
+      std::unique_ptr<fastdeploy::vision::segmentation::PaddleSegModel>(
+          new fastdeploy::vision::segmentation::PaddleSegModel(
+              std::string(model_file), std::string(params_file),
+              std::string(config_file), *runtime_option,
+              static_cast<fastdeploy::ModelFormat>(model_format)));
+  return fd_c_paddleseg_model_wrapper;
+}
+
+void FD_C_DestroyPaddleSegModelWrapper(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper) {
+  delete fd_c_paddleseg_model_wrapper;
+}
+
+FD_C_Bool FD_C_PaddleSegModelWrapperPredict(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper, FD_C_Mat img,
+    FD_C_SegmentationResult* fd_c_segmentation_result) {
+  cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
+  auto& paddleseg_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleSegModelWrapper, fd_c_paddleseg_model_wrapper);
+  FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper =
+      FD_C_CreateSegmentationResultWrapper();
+  auto& segmentation_result = CHECK_AND_CONVERT_FD_TYPE(
+      SegmentationResultWrapper, fd_c_segmentation_result_wrapper);
+
+  bool successful = paddleseg_model->Predict(im, segmentation_result.get());
+  if (successful) {
+    FD_C_SegmentationResultWrapperToCResult(fd_c_segmentation_result_wrapper,
+                                            fd_c_segmentation_result);
+  }
+  FD_C_DestroySegmentationResultWrapper(fd_c_segmentation_result_wrapper);
+  return successful;
+}
+
+FD_C_Bool FD_C_PaddleSegModelWrapperInitialized(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper) {
+  auto& paddleseg_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleSegModelWrapper, fd_c_paddleseg_model_wrapper);
+  return paddleseg_model->Initialized();
+}
+
+FD_C_Bool FD_C_PaddleSegModelWrapperBatchPredict(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+    FD_C_OneDimMat imgs, FD_C_OneDimSegmentationResult* results) {
+  std::vector<cv::Mat> imgs_vec;
+  std::vector<FD_C_SegmentationResultWrapper*> results_wrapper_out;
+  std::vector<fastdeploy::vision::SegmentationResult> results_out;
+  for (int i = 0; i < imgs.size; i++) {
+    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
+    FD_C_SegmentationResultWrapper* fd_segmentation_result_wrapper =
+        FD_C_CreateSegmentationResultWrapper();
+    results_wrapper_out.push_back(fd_segmentation_result_wrapper);
+  }
+  auto& paddleseg_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleSegModelWrapper, fd_c_paddleseg_model_wrapper);
+  bool successful = paddleseg_model->BatchPredict(imgs_vec, &results_out);
+  if (successful) {
+    // copy results back to FD_C_OneDimSegmentationResult
+    results->size = results_out.size();
+    results->data = new FD_C_SegmentationResult[results->size];
+    for (int i = 0; i < results_out.size(); i++) {
+      (*CHECK_AND_CONVERT_FD_TYPE(SegmentationResultWrapper,
+                                  results_wrapper_out[i])) =
+          std::move(results_out[i]);
+      FD_C_SegmentationResultWrapperToCResult(results_wrapper_out[i],
+                                              &results->data[i]);
+    }
+  }
+  for (int i = 0; i < results_out.size(); i++) {
+    FD_C_DestroySegmentationResultWrapper(results_wrapper_out[i]);
+  }
+  return successful;
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/c_api/fastdeploy_capi/vision/segmentation/ppseg/model.h
+++ b/c_api/fastdeploy_capi/vision/segmentation/ppseg/model.h
@@ -0,0 +1,90 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
+#include "fastdeploy_capi/runtime_option.h"
+#include "fastdeploy_capi/vision/result.h"
+
+typedef struct FD_C_PaddleSegModelWrapper FD_C_PaddleSegModelWrapper;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new FD_C_PaddleSegModelWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g net/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g unet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g unet/deploy.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleSegModelWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PaddleSegModelWrapper*
+FD_C_CreatePaddleSegModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format);
+
+/** \brief Destroy a FD_C_PaddleSegModelWrapper object
+ *
+ * \param[in] fd_c_paddleseg_model_wrapper pointer to FD_C_PaddleSegModelWrapper object
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyPaddleSegModelWrapper(
+    __fd_take FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+/** \brief Predict the segmentation result for an input image
+ *
+ * \param[in] fd_c_paddleseg_model_wrapper pointer to FD_C_PaddleSegModelWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_segmentation_result pointer to FD_C_SegmentationResult object, which stores the result.
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleSegModelWrapperPredict(
+    __fd_keep FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+    FD_C_Mat img, FD_C_SegmentationResult* fd_c_segmentation_result);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleseg_model_wrapper pointer to FD_C_PaddleSegModelWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleSegModelWrapperInitialized(
+    __fd_keep FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+
+/** \brief Predict the segmentation results for a batch of input images
+   *
+   * \param[in] fd_c_paddleseg_model_wrapper pointer to FD_C_PaddleSegModelWrapper object
+   * \param[in] imgs The input image list, each element comes from cv::imread()
+   * \param[in] results The output segmentation result list
+   * \return true if the prediction successed, otherwise false
+   */
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleSegModelWrapperBatchPredict(
+            __fd_keep FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+            FD_C_OneDimMat imgs,
+            FD_C_OneDimSegmentationResult* results);
+
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/c_api/fastdeploy_capi/vision/visualize.cc
+++ b/c_api/fastdeploy_capi/vision/visualize.cc
@@ -26,12 +26,13 @@ FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
                           float score_threshold, int line_size,
                           float font_size) {
  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
-      FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result);
+      FD_C_CreateDetectionResultWrapperFromCResult(fd_c_detection_result);
  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
      DetectionResultWrapper, fd_c_detection_result_wrapper);
  cv::Mat result = fastdeploy::vision::VisDetection(
      *(reinterpret_cast<cv::Mat*>(im)), *detection_result, score_threshold,
      line_size, font_size);
+  FD_C_DestroyDetectionResultWrapper(fd_c_detection_result_wrapper);
  return new cv::Mat(result);
 }

@@ -45,12 +46,13 @@ FD_C_Mat FD_C_VisDetectionWithLabel(FD_C_Mat im,
    labels_in.emplace_back(labels->data[i].data);
  }
  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
-      FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result);
+      FD_C_CreateDetectionResultWrapperFromCResult(fd_c_detection_result);
  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
      DetectionResultWrapper, fd_c_detection_result_wrapper);
  cv::Mat result = fastdeploy::vision::VisDetection(
      *(reinterpret_cast<cv::Mat*>(im)), *detection_result, labels_in,
      score_threshold, line_size, font_size);
+  FD_C_DestroyDetectionResultWrapper(fd_c_detection_result_wrapper);
  return new cv::Mat(result);
 }

@@ -59,12 +61,13 @@ FD_C_Mat FD_C_VisClassification(FD_C_Mat im,
                                int top_k, float score_threshold,
                                float font_size) {
  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
-      FD_C_CreateClassifyResultWrapperFromData(fd_c_classify_result);
+      FD_C_CreateClassifyResultWrapperFromCResult(fd_c_classify_result);
  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
      ClassifyResultWrapper, fd_c_classify_result_wrapper);
  cv::Mat result = fastdeploy::vision::VisClassification(
      *(reinterpret_cast<cv::Mat*>(im)), *classify_result, top_k,
      score_threshold, font_size);
+  FD_C_DestroyClassifyResultWrapper(fd_c_classify_result_wrapper);
  return new cv::Mat(result);
 }

@@ -77,22 +80,37 @@ FD_C_Mat FD_C_VisClassificationWithLabel(
    labels_in.emplace_back(labels->data[i].data);
  }
  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
-      FD_C_CreateClassifyResultWrapperFromData(fd_c_classify_result);
+      FD_C_CreateClassifyResultWrapperFromCResult(fd_c_classify_result);
  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
      ClassifyResultWrapper, fd_c_classify_result_wrapper);
  cv::Mat result = fastdeploy::vision::VisClassification(
      *(reinterpret_cast<cv::Mat*>(im)), *classify_result, labels_in, top_k,
      score_threshold, font_size);
+  FD_C_DestroyClassifyResultWrapper(fd_c_classify_result_wrapper);
  return new cv::Mat(result);
 }

 FD_C_Mat FD_C_VisOcr(FD_C_Mat im, FD_C_OCRResult* fd_c_ocr_result) {
  FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper =
-      FD_C_CreateOCRResultWrapperFromData(fd_c_ocr_result);
+      FD_C_CreateOCRResultWrapperFromCResult(fd_c_ocr_result);
  auto& ocr_result =
      CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper);
  cv::Mat result = fastdeploy::vision::VisOcr(*(reinterpret_cast<cv::Mat*>(im)),
                                              *ocr_result);
+  FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
+  return new cv::Mat(result);
+}
+
+FD_C_Mat FD_C_VisSegmentation(FD_C_Mat im,
+                              FD_C_SegmentationResult* fd_c_segmenation_result,
+                              float weight) {
+  FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper =
+      FD_C_CreateSegmentationResultWrapperFromCResult(fd_c_segmenation_result);
+  auto& segmentation_result = CHECK_AND_CONVERT_FD_TYPE(
+      SegmentationResultWrapper, fd_c_segmentation_result_wrapper);
+  cv::Mat result = fastdeploy::vision::VisSegmentation(
+      *(reinterpret_cast<cv::Mat*>(im)), *segmentation_result, weight);
+  FD_C_DestroySegmentationResultWrapper(fd_c_segmentation_result_wrapper);
  return new cv::Mat(result);
 }

--- a/c_api/fastdeploy_capi/vision/visualize.h
+++ b/c_api/fastdeploy_capi/vision/visualize.h
@@ -89,6 +89,17 @@ FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat FD_C_VisClassificationWithLabel
 */
 FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat FD_C_VisOcr(FD_C_Mat im, FD_C_OCRResult* ocr_result);

+/** \brief Show the visualized results for segmentation models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the result produced by model
+ * \param[in] weight transparent weight of visualized result image
+ * \return cv::Mat type stores the visualized results
+ */
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat FD_C_VisSegmentation(FD_C_Mat im,
+                                        FD_C_SegmentationResult* result,
+                                        float weight);
+

 #ifdef __cplusplus
 }  // extern "C"
--- a/cmake/rknpu2.cmake
+++ b/cmake/rknpu2.cmake
@@ -1,7 +1,7 @@
 # get RKNPU2_URL
 set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
-set(RKNPU2_VERSION "1.4.0")
-set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
+set(RKNPU2_VERSION "1.4.2b0")
+set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz")
 set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")

 # download_and_decompress
@@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
 # set path
 set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)

+# include lib
 if (EXISTS ${RKNPU_RUNTIME_PATH})
-    set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
-    include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
+    set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so)
+    include_directories(${RKNPU_RUNTIME_PATH}/include)
 else ()
-    message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
+    message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.")
 endif ()


--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@@ -42,6 +42,7 @@
 - [3. Android上如何使用FastDeploy C++ SDK](cn/faq/use_cpp_sdk_on_android.md)
 - [4. TensorRT使用中的一些技巧](cn/faq/tensorrt_tricks.md)
 - [5. 如何增加新的模型](cn/faq/develop_a_new_model.md)
+- [6. 如何给新模型增加C API](cn/faq/develop_c_api_for_a_new_model.md)

 ## 更多FastDeploy部署模块

--- a/docs/README_EN.md
+++ b/docs/README_EN.md
@@ -41,6 +41,7 @@
 - [3. How to Use FastDeploy C++ SDK on Android Platform](en/faq/use_cpp_sdk_on_android.md)
 - [4. Tricks of TensorRT](en/faq/tensorrt_tricks.md)
 - [5. How to Develop a New Model](en/faq/develop_a_new_model.md)
+- [6. How to Develop C API for a New Model](en/faq/develop_c_api_for_a_new_model.md)

 ## More FastDeploy Deployment Module

--- a/docs/cn/faq/develop_c_api_for_a_new_model.md
+++ b/docs/cn/faq/develop_c_api_for_a_new_model.md
@@ -0,0 +1,143 @@
+[English](../../en/faq/develop_c_api_for_a_new_model.md) | 中文
+
+# FastDeploy给模型新增C API
+
+## 相关概念
+
+FastDeploy的核心代码库的实现是基于C++开发的，为了增强接口的可移植性以及提供多种不同开发语言的SDK，有必要提供一组C API，用来作为不同编程语言沟通的桥梁。
+
+按照FastDeploy目前的实现结构，新增一个模型的API通常涉及以下三个部分：
+
+- Model
+
+  模型接口，提供给用户进行模型创建和载入、预测的功能。
+
+- Result
+
+  模型推理的结果
+
+- Visualization
+
+  对推理结果进行可视化的功能
+
+对于Model, 已经基于C++对需要暴露给使用者的接口进行了实现，所需要做的只是使用C风格的接口基于C++接口再包裹一层。对于Result，需要使用C的结构对推理结果进行重新定义。对可视化函数，也只需要使用C风格的接口对C++接口进行包裹即可。
+
+我们对命名规则做一个约定，所有和提供C API有关的结构和函数使用FD_C作为前缀进行命名。当使用C对C++的类进行封装时，采用FD_C_{类名}Wrapper的方式对结构进行命令。如果需要调用C++类中的某个方法，采用FD_C_{类名}Wrapper{方法名}的方式进行命名。
+比如，对于C++中的fastdeploy::RuntimeOption类别，使用C进行封装的形式为
+```c
+struct FD_C_RuntimeOptionWrapper {
+  std::unique_ptr<fastdeploy::RuntimeOption> runtime_option;
+}
+```
+可以看到，这个结构里面用的其实是C++的内容，所以在C语言里，只使用FD_C_RuntimeOptionWrapper这个结构的指针，通过这个指针对C++的实际实现函数进行调用。比如想要调用RuntimeOption::UseCpu()这个函数，在C语言里的封装如下
+```c
+void FD_C_RuntimeOptionWrapperUseCpu(FD_C_RuntimeOptionWrapper fd_c_runtimeoption_wrapper){
+  auto& runtime_option = fd_c_runtimeoption_wrapper->runtime_option;
+  runtime_option->UseCpu();
+}
+```
+
+通过这种方式，FD_C_RuntimeOptionWrapper负责持有C++里实际的类， FD_C_RuntimeOptionWrapper{方法名}负责调用C++里类的方法，实现用户在C语言里使用C API接口访问C++所实现的类和函数。
+
+
+## 实现流程
+
+下面通过给ppseg系列模型提供C API为示例讲述如何在当前框架下进行C API的实现。
+
+1. 提供表示分割模型结果的数据结构
+
+打开文件fastdeploy/vision/common/result.h, 里面定义各种不同类别模型预测结果的数据结构，找到SegmentationResult，将下列数据结构用纯C结构进行表示
+```c++
+struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  std::vector<int64_t> shape;
+  bool contain_score_map = false;
+  ResultType type = ResultType::SEGMENTATION;
+}
+```
+对应的定义一个C的FD_C_SegmentationResult结构进行表示
+```c
+typedef struct FD_C_SegmentationResult {
+  FD_C_OneDimArrayUint8 label_map;
+  FD_C_OneDimArrayFloat score_map;
+  FD_C_OneDimArrayInt64 shape;
+  FD_C_Bool contain_score_map;
+  FD_C_ResultType type;
+} FD_C_SegmentationResult;
+```
+关于FD_C_OneDimArrayUint8之类的表示，可以参考文件c_api/fastdeploy_capi/fd_type.h。
+
+之后需要定义两个函数，用来从fastdeploy::SegmentationResult和FD_C_SegmentationResult之间进行相互转化。由于对C++的结构使用了对应的Wrapper结构进行包裹，所以实际定义的是FD_C_SegmentationResultWrapper和FD_C_SegmentationResult之间的转化，对应下面两个函数。
+```c
+FASTDEPLOY_CAPI_EXPORT extern FD_C_SegmentationResultWrapper*
+FD_C_CreateSegmentationResultWrapperFromCResult(
+     FD_C_SegmentationResult* fd_c_segmentation_result);
+
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_SegmentationResultWrapperToCResult(
+     FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper,
+     FD_C_SegmentationResult* fd_c_segmentation_result);
+```
+还有其它的几个创建和销毁结构的API函数可以参考示例代码进行补充实现。
+
+关于各种Result在C API中的实现位置为c_api/fastdeploy_capi/vision/result.cc。
+
+关于声明各种Wrapper的结构可以参考文件c_api/fastdeploy_capi/types_internal.h 。
+
+2. 提供模型接口的C API
+
+打开文件fastdeploy/vision/segmentation/ppseg/model.h，里面定义了分割模型的C++接口，即fastdeploy::vision::segmentation::PaddleSegModel类。在C中创建一个Wrapper来表示这个类，为了方便后续对同一类别的模型进行快速定义和实现，c_api/fastdeploy_capi/types_internal.h中定义了宏来快速创建Wrapper，以及从Wrapper中取出所包裹的类的对象。例如定义创建分割类模型的Wrapper的宏为
+```c
+#define DEFINE_SEGMENTATION_MODEL_WRAPPER_STRUCT(typename, varname)  typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::segmentation::typename> varname; \
+} FD_C_##typename##Wrapper
+```
+可以按照那个文件已实现的宏的结构，对其它的宏定义进行补充。
+
+声明了结构后，就需要在具体的模型类别目录下进行接口实现了。对应C++的目录结构，创建一个保存分割模型C API的目录c_api/fastdeploy_capi/vision/segmentation/ppseg, 创建文件model.h和model.cc分别声明和实现模型的C接口。
+
+通过对照PaddleSegModel类里暴露的方法，目前主要需要实现如下五个接口
+```
+// 创建模型
+FD_C_PaddleSegModelWrapper*
+FD_C_CreatePaddleSegModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format);
+
+// 销毁模型
+
+void FD_C_DestroyPaddleSegModelWrapper(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+// 判断初始化是否成功
+
+FD_C_Bool FD_C_PaddleSegModelWrapperInitialized(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+// 预测单张图
+FD_C_Bool FD_C_PaddleSegModelWrapperPredict(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+    FD_C_Mat img, FD_C_SegmentationResult* fd_c_segmentation_result);
+
+// 成批预测
+FD_C_Bool FD_C_PaddleSegModelWrapperBatchPredict(
+            FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+            FD_C_OneDimMat imgs,
+            FD_C_OneDimSegmentationResult* results);
+```
+
+3. 提供可视化函数C API
+
+打开文件fastdeploy/vision/visualize/visualize.h，里面有对于不同类型模型推理结果进行可视化的函数。在c_api/fastdeploy_capi/vision/visualize.h中对其进行封装一下。例如在C API中需要定义并实现如下对分割结果进行可视化的函数
+
+```c
+FD_C_Mat FD_C_VisSegmentation(FD_C_Mat im,
+                              FD_C_SegmentationResult* result,
+                              float weight)
+```
+
+4. 创建example, 测试所添加的C API
+
+在examples目录下，根据所接入的模型的类别，在对应的文件夹下新增目录名c，里面创建c的示例代码和CMakeLists.txt，编译测试，确保使用新增的C API能够正常工作。
--- a/docs/en/faq/develop_c_api_for_a_new_model.md
+++ b/docs/en/faq/develop_c_api_for_a_new_model.md
@@ -0,0 +1,152 @@
+English | [中文](../../cn/faq/develop_c_api_for_a_new_model.md)
+
+# Adds C API to models
+
+## Introduction
+
+The core code library of FastDeploy is implemented based on C++ development. In order to enhance the portability of the interface and provide SDKs for different development languages, it is necessary to provide a set of C APIs to serve as a bridge for communication between different programming languages.
+
+According to FastDeploy’s current implementation structure, adding C APIs for a model usually involves the following three parts:
+
+- Model
+
+Model interface, providing users with functions for model creation and loading, prediction.
+
+- Result
+
+The result of model inference
+
+- Visualization
+
+Function for visualizing inference results
+
+For Model, C++ has been used to implement the interfaces that need to be exposed to users. What needs to be done is to use C-style interfaces based on C++ interfaces and wraps them in another structure. For Result, C structures are used to define inference results. For visualization functions, you only need to use C-style interfaces to wrap C++ interfaces.
+
+We make a convention on naming rules. All structures and functions related to provide C APIs are named with FD_C as a prefix. When using C to encapsulate C++ classes, use FD_C_{class name}Wrapper as the structure name. If you need to define a C interface to call a method in a C++ class, use FD_C_{class name}Wrapper{method name} as the name. For example, for the fastdeploy::RuntimeOption class in C++, use C encapsulation as follows
+
+```c
+struct FD_C_RuntimeOptionWrapper {
+  std::unique_ptr<fastdeploy::RuntimeOption> runtime_option;
+}
+```
+
+You can see that this structure actually uses C++ data, so in C language, only pointers of FD_C_RuntimeOptionWrapper are used. Through this pointer, call the actual implementation function in C++. For example, if you want to call RuntimeOption::UseCpu() function in C language,
+
+```c
+void FD_C_RuntimeOptionWrapperUseCpu(FD_C_RuntimeOptionWrapper fd_c_runtimeoption_wrapper){
+  auto& runtime_option = fd_c_runtimeoption_wrapper->runtime_option;
+  runtime_option->UseCpu();
+}
+```
+
+In this way, FD_C_RuntimeOptionWrapper is responsible for holding the actual class in C++, and FD_C_RuntimeOptionWrapper{method name} is responsible for calling methods of classes in C++, helping user access classes and functions implemented by c++ using c api interface in c language.
+
+## Implementation process
+
+The following describes how to implement C API for ppseg series models as an example of how to implement C API under the current framework.
+
+1. Provide a data structure that represents segmentation results
+
+Open file fastdeploy/vision/common/result.h, which defines data structures for different types of model prediction results, find SegmentationResult, and use pure C structure to represent the following data structure
+
+```c++
+struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  std::vector<int64_t> shape;
+  bool contain_score_map = false;
+  ResultType type = ResultType::SEGMENTATION;
+}
+```
+
+Define a C FD_C_SegmentationResult structure for representation correspondingly
+
+```c
+typedef struct FD_C_SegmentationResult {
+  FD_C_OneDimArrayUint8 label_map;
+  FD_C_OneDimArrayFloat score_map;
+  FD_C_OneDimArrayInt64 shape;
+  FD_C_Bool contain_score_map;
+  FD_C_ResultType type;
+} FD_C_SegmentationResult;
+```
+
+For representations such as FD_C_OneDimArrayUint8, refer to file c_api/fastdeploy_capi/fd_type.h.
+
+Then you need to define two functions that convert between fastdeploy::SegmentationResult and FD_C_SegmentationResult. Since a corresponding Wrapper structure in C is used for wrapping C++ structures, what is actually defined is conversion between FD_C_SegmentationResultWrapper and FD_C_SegmentationResult, corresponding to the following two functions.
+
+```c
+FASTDEPLOY_CAPI_EXPORT extern FD_C_SegmentationResultWrapper*
+FD_C_CreateSegmentationResultWrapperFromCResult(
+     FD_C_SegmentationResult* fd_c_segmentation_result);
+
+FASTDEPLOY_CAPI_EXPORT extern void
+FD_C_SegmentationResultWrapperToCResult(
+     FD_C_SegmentationResultWrapper* fd_c_segmentation_result_wrapper,
+     FD_C_SegmentationResult* fd_c_segmentation_result);
+```
+
+There are also other API functions for creating and destroying structures that can be implemented by referring to the sample code.
+
+The implementation of various Results in C API is located at c_api/fastdeploy_capi/vision/result.cc.
+
+For declaring various Wrapper structures, refer to file c_api/fastdeploy_capi/types_internal.h .
+
+2. Provide C API for model interface
+Open file fastdeploy/vision/segmentation/ppseg/model.h, which defines the C++ interface for segmentation model, i.e. fastdeploy::vision::segmentation::PaddleSegModel class. Create a Wrapper in C to represent this class. For convenience of quick definition and implementation of models of the same category in the future, c_api/fastdeploy_capi/types_internal.h defines macros to quickly create Wrapper and extract the wrapped class object from Wrapper. For example, define a macro to create a Wrapper for segmentation model as
+
+```c
+#define DEFINE_SEGMENTATION_MODEL_WRAPPER_STRUCT(typename, varname)  typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::segmentation::typename> varname; \
+} FD_C_##typename##Wrapper
+```
+
+You can supplement other macro definitions referring to the structure of the macros already implemented in that file.
+
+After declaring the structure, you need to implement the interface in the specific model category directory. Corresponding to C++ directory structure, create a directory c_api/fastdeploy_capi/vision/segmentation/ppseg to save C API for segmentation model, create files model.h and model.cc respectively to declare and implement C API for model.
+
+By comparing with methods exposed by PaddleSegModel class, currently mainly need to implement following five interfaces
+
+```c
+// Create model
+FD_C_PaddleSegModelWrapper*
+FD_C_CreatePaddleSegModelWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    const FD_C_ModelFormat model_format);
+
+// Destroy model
+
+void FD_C_DestroyPaddleSegModelWrapper(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+// Initilization
+
+FD_C_Bool FD_C_PaddleSegModelWrapperInitialized(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper);
+
+// Predict
+FD_C_Bool FD_C_PaddleSegModelWrapperPredict(
+    FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+    FD_C_Mat img, FD_C_SegmentationResult* fd_c_segmentation_result);
+
+// Batch prediction
+FD_C_Bool FD_C_PaddleSegModelWrapperBatchPredict(
+            FD_C_PaddleSegModelWrapper* fd_c_paddleseg_model_wrapper,
+            FD_C_OneDimMat imgs,
+            FD_C_OneDimSegmentationResult* results);
+```
+
+3. Provide C API for visualization function
+
+Open file fastdeploy/vision/visualize/visualize.h, which has functions for visualizing inference results of different types of models. Wrap them in c_api/fastdeploy_capi/vision/visualize.h. For example, define and implement following function for visualizing segmentation results.
+
+```c
+FD_C_Mat FD_C_VisSegmentation(FD_C_Mat im,
+                              FD_C_SegmentationResult* result,
+                              float weight)
+```
+
+4. Create example to test added C API
+
+In examples directory, according to category of model, create new directory named c in corresponding folder.Create c sample code and CMakeLists.txt inside, then compile and test it, to ensure that the added C API can work normally.
--- a/examples/vision/classification/paddleclas/c/README.md
+++ b/examples/vision/classification/paddleclas/c/README.md
@@ -150,31 +150,16 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
 #### Result

 ```c
-FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
-    FD_C_ClassifyResult* fd_c_classify_result)
+void FD_C_ClassifyResultStr(
+    FD_C_ClassifyResult* fd_c_classify_result,
+    char* str_buffer);
 ```
 >
-> Create a pointer to FD_C_ClassifyResultWrapper structure, which contains `fastdeploy::vision::ClassifyResult` object in C++. You can call methods in C++ ClassifyResult object by C API with this pointer.
+> print result
 >
 > **Params**
 > * **fd_c_classify_result**(FD_C_ClassifyResult*): pointer to FD_C_ClassifyResult structure
->
-> **Return**
-> * **fd_c_classify_result_wrapper**(FD_C_ClassifyResultWrapper*): pointer to FD_C_ClassifyResultWrapper structure
-
-
-```c
-char* FD_C_ClassifyResultWrapperStr(
-    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
-```
->
-> Call Str() methods in `fastdeploy::vision::ClassifyResult` object contained in FD_C_ClassifyResultWrapper structure，and return a string to describe information in result.
->
-> **Params**
-> * **fd_c_classify_result_wrapper**(FD_C_ClassifyResultWrapper*): pointer to FD_C_ClassifyResultWrapper structure
->
-> **Return**
-> * **str**(char*): a string to describe information in result
+> * **str_buffer**(char*): used to store result string


 - [Model Description](../../)
--- a/examples/vision/classification/paddleclas/c/README_CN.md
+++ b/examples/vision/classification/paddleclas/c/README_CN.md
@@ -153,32 +153,15 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
 #### Predict结果

 ```c
-FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
-    FD_C_ClassifyResult* fd_c_classify_result)
+void FD_C_ClassifyResultStr(
+    FD_C_ClassifyResult* fd_c_classify_result， char* str_buffer);
 ```
 >
-> 创建一个FD_C_ClassifyResultWrapper对象的指针，FD_C_ClassifyResultWrapper中包含了C++的`fastdeploy::vision::ClassifyResult`对象，通过该指针，使用C API可以访问调用对应C++中的函数。
->
+> 打印结果
 >
 > **参数**
 > * **fd_c_classify_result**(FD_C_ClassifyResult*): 指向FD_C_ClassifyResult对象的指针
->
-> **返回**
-> * **fd_c_classify_result_wrapper**(FD_C_ClassifyResultWrapper*): 指向FD_C_ClassifyResultWrapper的指针
-
-
-```c
-char* FD_C_ClassifyResultWrapperStr(
-    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
-```
->
-> 调用FD_C_ClassifyResultWrapper所包含的`fastdeploy::vision::ClassifyResult`对象的Str()方法，返回相关结果内数据信息的字符串。
->
-> **参数**
-> * **fd_c_classify_result_wrapper**(FD_C_ClassifyResultWrapper*): 指向FD_C_ClassifyResultWrapper对象的指针
->
-> **返回**
-> * **str**(char*): 表示结果数据信息的字符串
+> * **str_buffer**(char*): 保存结果数据信息的字符串



--- a/examples/vision/classification/paddleclas/c/infer.c
+++ b/examples/vision/classification/paddleclas/c/infer.c
@@ -62,19 +62,11 @@ void CpuInfer(const char* model_dir, const char* image_file) {
  }

  // print res
-  // You can directly access fields in FD_C_ClassifyResult and print it refer to
-  // ClassifyResult API Doc Or you can wrap it using
-  // FD_C_ClassifyResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::ClassifyResult, and using C API
-  // FD_C_ClassifyResultWrapperStr to call
-  // fastdeploy::vision::ClassifyResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_ClassifyResultWrapper* result_wrapper =
-      FD_C_CreateClassifyResultWrapperFromData(result);
-  printf("%s", FD_C_ClassifyResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_ClassifyResultStr(result, res);
+  printf("%s", res);
  FD_C_DestroyRuntimeOptionWrapper(option);
  FD_C_DestroyPaddleClasModelWrapper(model);
-  FD_C_DestroyClassifyResultWrapper(result_wrapper);
  FD_C_DestroyClassifyResult(result);
  FD_C_DestroyMat(im);
 }
@@ -118,19 +110,11 @@ void GpuInfer(const char* model_dir, const char* image_file) {
  }

  // print res
-  // You can directly access fields in FD_C_ClassifyResult and print it refer to
-  // ClassifyResult API Doc Or you can wrap it using
-  // FD_C_ClassifyResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::ClassifyResult, and using C API
-  // FD_C_ClassifyResultWrapperStr to call
-  // fastdeploy::vision::ClassifyResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_ClassifyResultWrapper* result_wrapper =
-      FD_C_CreateClassifyResultWrapperFromData(result);
-  printf("%s", FD_C_ClassifyResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_ClassifyResultStr(result, res);
+  printf("%s", res);
  FD_C_DestroyRuntimeOptionWrapper(option);
  FD_C_DestroyPaddleClasModelWrapper(model);
-  FD_C_DestroyClassifyResultWrapper(result_wrapper);
  FD_C_DestroyClassifyResult(result);
  FD_C_DestroyMat(im);
 }
--- a/examples/vision/ocr/PP-OCRv2/c/infer.c
+++ b/examples/vision/ocr/PP-OCRv2/c/infer.c
@@ -112,16 +112,9 @@ void CpuInfer(const char* det_model_dir, const char* cls_model_dir,
  }

  // print res
-  // You can directly access fields in FD_C_OCRResult and print it refer to
-  // OCRResult API Doc Or you can wrap it using
-  // FD_C_OCRResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::OCRResult, and using C API
-  // FD_C_OCRResultWrapperStr to call
-  // fastdeploy::vision::OCRResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_OCRResultWrapper* result_wrapper =
-      FD_C_CreateOCRResultWrapperFromData(result);
-  printf("%s", FD_C_OCRResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_OCRResultStr(result, res);
+  printf("%s", res);
  FD_C_Mat vis_im = FD_C_VisOcr(im, result);
  FD_C_Imwrite("vis_result.jpg", vis_im);
  printf("Visualized result saved in ./vis_result.jpg\n");
@@ -133,9 +126,9 @@ void CpuInfer(const char* det_model_dir, const char* cls_model_dir,
  FD_C_DestroyDBDetectorWrapper(det_model);
  FD_C_DestroyRecognizerWrapper(rec_model);
  FD_C_DestroyPPOCRv2Wrapper(ppocr_v2);
-  FD_C_DestroyOCRResultWrapper(result_wrapper);
  FD_C_DestroyOCRResult(result);
  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
 }

 void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
@@ -213,16 +206,9 @@ void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
  }

  // print res
-  // You can directly access fields in FD_C_OCRResult and print it refer to
-  // OCRResult API Doc Or you can wrap it using
-  // FD_C_OCRResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::OCRResult, and using C API
-  // FD_C_OCRResultWrapperStr to call
-  // fastdeploy::vision::OCRResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_OCRResultWrapper* result_wrapper =
-      FD_C_CreateOCRResultWrapperFromData(result);
-  printf("%s", FD_C_OCRResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_OCRResultStr(result, res);
+  printf("%s", res);
  FD_C_Mat vis_im = FD_C_VisOcr(im, result);
  FD_C_Imwrite("vis_result.jpg", vis_im);
  printf("Visualized result saved in ./vis_result.jpg\n");
@@ -234,9 +220,9 @@ void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
  FD_C_DestroyDBDetectorWrapper(det_model);
  FD_C_DestroyRecognizerWrapper(rec_model);
  FD_C_DestroyPPOCRv2Wrapper(ppocr_v2);
-  FD_C_DestroyOCRResultWrapper(result_wrapper);
  FD_C_DestroyOCRResult(result);
  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
 }
 int main(int argc, char* argv[]) {
  if (argc < 7) {
--- a/examples/vision/ocr/PP-OCRv3/c/infer.c
+++ b/examples/vision/ocr/PP-OCRv3/c/infer.c
@@ -112,16 +112,9 @@ void CpuInfer(const char* det_model_dir, const char* cls_model_dir,
  }

  // print res
-  // You can directly access fields in FD_C_OCRResult and print it refer to
-  // OCRResult API Doc Or you can wrap it using
-  // FD_C_OCRResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::OCRResult, and using C API
-  // FD_C_OCRResultWrapperStr to call
-  // fastdeploy::vision::OCRResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_OCRResultWrapper* result_wrapper =
-      FD_C_CreateOCRResultWrapperFromData(result);
-  printf("%s", FD_C_OCRResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_OCRResultStr(result, res);
+  printf("%s", res);
  FD_C_Mat vis_im = FD_C_VisOcr(im, result);
  FD_C_Imwrite("vis_result.jpg", vis_im);
  printf("Visualized result saved in ./vis_result.jpg\n");
@@ -133,9 +126,9 @@ void CpuInfer(const char* det_model_dir, const char* cls_model_dir,
  FD_C_DestroyDBDetectorWrapper(det_model);
  FD_C_DestroyRecognizerWrapper(rec_model);
  FD_C_DestroyPPOCRv3Wrapper(ppocr_v3);
-  FD_C_DestroyOCRResultWrapper(result_wrapper);
  FD_C_DestroyOCRResult(result);
  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
 }

 void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
@@ -213,16 +206,9 @@ void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
  }

  // print res
-  // You can directly access fields in FD_C_OCRResult and print it refer to
-  // OCRResult API Doc Or you can wrap it using
-  // FD_C_OCRResult_Wrapper, which containes C++ structure
-  // fastdeploy::vision::OCRResult, and using C API
-  // FD_C_OCRResultWrapperStr to call
-  // fastdeploy::vision::OCRResult::Str() in it. For convenience, we choose
-  // this method to print it.
-  FD_C_OCRResultWrapper* result_wrapper =
-      FD_C_CreateOCRResultWrapperFromData(result);
-  printf("%s", FD_C_OCRResultWrapperStr(result_wrapper));
+  char res[2000];
+  FD_C_OCRResultStr(result, res);
+  printf("%s", res);
  FD_C_Mat vis_im = FD_C_VisOcr(im, result);
  FD_C_Imwrite("vis_result.jpg", vis_im);
  printf("Visualized result saved in ./vis_result.jpg\n");
@@ -234,9 +220,9 @@ void GpuInfer(const char* det_model_dir, const char* cls_model_dir,
  FD_C_DestroyDBDetectorWrapper(det_model);
  FD_C_DestroyRecognizerWrapper(rec_model);
  FD_C_DestroyPPOCRv3Wrapper(ppocr_v3);
-  FD_C_DestroyOCRResultWrapper(result_wrapper);
  FD_C_DestroyOCRResult(result);
  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
 }
 int main(int argc, char* argv[]) {
  if (argc < 7) {
@@ -245,7 +231,7 @@ int main(int argc, char* argv[]) {
        "path/to/rec_model path/to/rec_label_file path/to/image "
        "run_option, "
        "e.g ./infer_demo ./ch_PP-OCRv3_det_infer "
-        "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
+        "./ch_ppocr_mobile_v3.0_cls_infer ./ch_PP-OCRv3_rec_infer "
        "./ppocr_keys_v1.txt ./12.jpg 0\n");
    printf(
        "The data type of run_option is int, 0: run with cpu; 1: run with gpu"
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/README.md
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md
@@ -0,0 +1,77 @@
+# PaddleOCR 模型部署
+
+## PaddleOCR为多个模型组合串联任务，包含如下几个模型构成
+
+* 文本检测 `DBDetector`
+* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向
+* 文字识别 `Recognizer` 用于从图像中识别出文字
+
+根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件（或2个，分类器可选), 完成OCR整个预测流程
+
+## PP-OCR 中英文系列模型
+
+下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md)
+
+| OCR版本              | 文本框检测                                                                                                          | 方向分类模型                                                                                                         | 文字识别                                                                                                           | 字典文件                                                                              | 说明                                                      |
+|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------|
+| ch_PP-OCRv3[推荐]    | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar)                  | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar)                  | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型，支持中英文、多语种文本检测                            |
+| en_PP-OCRv3[推荐]    | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar)                  | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar)                  | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt)             | OCRv3系列原始超轻量模型，支持英文与数字识别，除检测模型和识别模型的训练数据与中文模型不同以外，无其他区别 |
+| ch_PP-OCRv2        | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar)                  | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar)                  | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型，支持中英文、多语种文本检测                            |
+| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型，支持中英文、多语种文本检测,比PPOCRv2更加轻量               |
+| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测，比超轻量模型更大，但效果更好              |
+
+## 模型转换
+
+在RKNPU2上使用PPOCR时，我们需要把Paddle静态图模型转为RKNN模型。
+
+### 静态图模型转RKNN格式模型
+
+rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型，因此我们需要先将Paddle静态图模型转为RKNN模型。
+
+```bash
+# 下载模型和字典文件
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar -xvf ch_PP-OCRv3_det_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar -xvf ch_PP-OCRv3_rec_infer.tar
+
+# 转换模型到ONNX格式的模型
+paddle2onnx --model_dir ch_PP-OCRv3_det_infer \
+            --model_filename inference.pdmodel \
+            --params_filename inference.pdiparams \
+            --save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+            --enable_dev_version True
+paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \
+            --model_filename inference.pdmodel \
+            --params_filename inference.pdiparams \
+            --save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+            --enable_dev_version True
+paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \
+            --model_filename inference.pdmodel \
+            --params_filename inference.pdiparams \
+            --save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+            --enable_dev_version True
+
+# 固定模型的输入shape
+python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                               --output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                               --input_shape_dict "{'x':[1,3,960,960]}"
+python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                               --output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                               --input_shape_dict "{'x':[1,3,48,192]}"
+python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                               --output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                               --input_shape_dict "{'x':[1,3,48,320]}"
+
+# 转换ONNX模型到RKNN模型
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \
+                              --target_platform rk3588
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \
+                              --target_platform rk3588
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \
+                              --target_platform rk3588
+```
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
@@ -0,0 +1,55 @@
+English | [简体中文](README_CN.md)
+# PPOCRv3 C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT.
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
+
+```
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+
+# Download model, image, and dictionary files
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+# CPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                          ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                          ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                          ./ppocr_keys_v1.txt \
+                          ./12.jpg \
+                          0
+# RKNPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+                            ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+                             ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+                              ./ppocr_keys_v1.txt \
+                              ./12.jpg \
+                              1
+```
+
+The above command works for Linux or MacOS. For SDK in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+The visualized result after running is as follows
+
+<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
+
+## Other Documents
+
+- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
+- [PPOCR Model Description](../../)
+- [PPOCRv3 Python Deployment](../python)
+- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md)
+- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
@@ -0,0 +1,63 @@
+[English](README_CN.md) | 简体中文
+# PPOCRv3 C++部署示例
+
+本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+
+在部署前，需确认你已经成功完成以下两个操作:
+
+* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md).
+* [成功转换模型](../README.md).
+
+在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。
+
+```
+mkdir build
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载图片和字典文件
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+
+# 拷贝RKNN模型到build目录
+
+# CPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                          ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                          ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                          ./ppocr_keys_v1.txt \
+                          ./12.jpg \
+                          0
+# RKNPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+                            ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+                             ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+                              ./ppocr_keys_v1.txt \
+                              ./12.jpg \
+                              1
+```
+
+运行完成可视化结果如下图所示:
+
+<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
+
+结果输出如下:
+
+```text
+det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text:  rec score:0.000000 cls label: 1 cls score: 0.766602
+det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000
+det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000
+det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000
+det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000
+Visualized result saved in ./vis_result.jpg
+```
+
+
+## 其它文档
+
+- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
+- [PPOCR 系列模型介绍](../../../README_CN.md)
+- [PPOCRv3 Python部署](../python)
+- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
@@ -0,0 +1,126 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void InitAndInfer(const std::string &det_model_file,
+                  const std::string &cls_model_file,
+                  const std::string &rec_model_file,
+                  const std::string &rec_label_file,
+                  const std::string &image_file,
+                  const fastdeploy::RuntimeOption &option,
+                  const fastdeploy::ModelFormat &format) {
+  auto det_params_file = "";
+  auto cls_params_file = "";
+  auto rec_params_file = "";
+
+  auto det_option = option;
+  auto cls_option = option;
+  auto rec_option = option;
+
+  if (format == fastdeploy::ONNX) {
+    std::cout << "ONNX Model" << std::endl;
+  }
+
+  auto det_model = fastdeploy::vision::ocr::DBDetector(
+      det_model_file, det_params_file, det_option, format);
+  auto cls_model = fastdeploy::vision::ocr::Classifier(
+      cls_model_file, cls_params_file, cls_option, format);
+  auto rec_model = fastdeploy::vision::ocr::Recognizer(
+      rec_model_file, rec_params_file, rec_label_file, rec_option, format);
+
+  if (format == fastdeploy::RKNN) {
+    cls_model.GetPreprocessor().DisableNormalize();
+    cls_model.GetPreprocessor().DisablePermute();
+
+    det_model.GetPreprocessor().DisableNormalize();
+    det_model.GetPreprocessor().DisablePermute();
+
+    rec_model.GetPreprocessor().DisableNormalize();
+    rec_model.GetPreprocessor().DisablePermute();
+  }
+  det_model.GetPreprocessor().SetStaticShapeInfer(true);
+  rec_model.GetPreprocessor().SetStaticShapeInfer(true);
+
+  assert(det_model.Initialized());
+  assert(cls_model.Initialized());
+  assert(rec_model.Initialized());
+
+  // The classification model is optional, so the PP-OCR can also be connected
+  // in series as follows auto ppocr_v3 =
+  // fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
+  auto ppocr_v3 =
+      fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
+
+  // When users enable static shape infer for rec model, the batch size of cls
+  // and rec model must to be set to 1.
+  ppocr_v3.SetClsBatchSize(1);
+  ppocr_v3.SetRecBatchSize(1);
+
+  if (!ppocr_v3.Initialized()) {
+    std::cerr << "Failed to initialize PP-OCR." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::OCRResult result;
+  if (!ppocr_v3.Predict(im, &result)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << result.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisOcr(im, result);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc < 7) {
+    std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model "
+                 "path/to/rec_model path/to/rec_label_file path/to/image "
+                 "run_option, "
+                 "e.g ./infer_demo ./ch_PP-OCRv3_det_infer "
+                 "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
+                 "./ppocr_keys_v1.txt ./12.jpg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with ascend."
+              << std::endl;
+    return -1;
+  }
+
+  fastdeploy::RuntimeOption option;
+  fastdeploy::ModelFormat format;
+  int flag = std::atoi(argv[6]);
+
+  if (flag == 0) {
+    option.UseCpu();
+    format = fastdeploy::ONNX;
+  } else if (flag == 1) {
+    option.UseRKNPU2();
+    format = fastdeploy::RKNN;
+  }
+
+  std::string det_model_dir = argv[1];
+  std::string cls_model_dir = argv[2];
+  std::string rec_model_dir = argv[3];
+  std::string rec_label_file = argv[4];
+  std::string test_image = argv[5];
+  InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file,
+               test_image, option, format);
+  return 0;
+}
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
@@ -0,0 +1,49 @@
+English | [简体中文](README_CN.md)
+# PPOCRv3 Python Deployment Example
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2.  Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+
+```
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/ocr/PP-OCRv3/python/
+
+python3 infer_static_shape.py \
+                --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                --rec_label_file ./ppocr_keys_v1.txt \
+                --image 12.jpg \
+                --device cpu
+
+# NPU推理
+python3 infer_static_shape.py \
+                --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+                --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+                --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+                --rec_label_file ppocr_keys_v1.txt \
+                --image 12.jpg \
+                --device npu
+```
+
+The visualized result after running is as follows
+<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
+
+
+
+
+## Other Documents
+
+- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
+- [PPOCR Model Description](../../)
+- [PPOCRv3 C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
@@ -0,0 +1,62 @@
+[English](README.md) | 简体中文
+# PPOCRv3 Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```
+
+# 下载模型,图片和字典文件
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar xvf ch_PP-OCRv3_det_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar xvf ch_PP-OCRv3_rec_infer.tar
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/ocr/PP-OCRv3/python/
+
+# CPU推理
+python3 infer_static_shape.py \
+                --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+                --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+                --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+                --rec_label_file ./ppocr_keys_v1.txt \
+                --image 12.jpg \
+                --device cpu
+
+# NPU推理
+python3 infer_static_shape.py \
+                --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+                --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+                --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+                --rec_label_file ppocr_keys_v1.txt \
+                --image 12.jpg \
+                --device npu
+```
+
+运行完成可视化结果如下图所示
+<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
+
+
+
+
+## 其它文档
+
+- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
+- [PPOCR 系列模型介绍](../../)
+- [PPOCRv3 C++部署](../cpp)
+- [模型预测结果说明](../../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--det_model", required=True, help="Path of Detection model of PPOCR.")
+    parser.add_argument(
+        "--cls_model",
+        required=True,
+        help="Path of Classification model of PPOCR.")
+    parser.add_argument(
+        "--rec_model",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--rec_label_file",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
+    parser.add_argument(
+        "--cpu_thread_num",
+        type=int,
+        default=9,
+        help="Number of threads while inference on CPU.")
+    return parser.parse_args()
+
+
+def build_option(args):
+
+    det_option = fd.RuntimeOption()
+    cls_option = fd.RuntimeOption()
+    rec_option = fd.RuntimeOption()
+    if args.device == "npu":
+        det_option.use_rknpu2()
+        cls_option.use_rknpu2()
+        rec_option.use_rknpu2()
+
+    return det_option, cls_option, rec_option
+
+
+def build_format(args):
+    det_format = fd.ModelFormat.ONNX
+    cls_format = fd.ModelFormat.ONNX
+    rec_format = fd.ModelFormat.ONNX
+    if args.device == "npu":
+        det_format = fd.ModelFormat.RKNN
+        cls_format = fd.ModelFormat.RKNN
+        rec_format = fd.ModelFormat.RKNN
+
+    return det_format, cls_format, rec_format
+
+
+args = parse_arguments()
+
+# Detection模型, 检测文字框
+det_model_file = args.det_model
+det_params_file = ""
+# Classification模型，方向分类，可选
+cls_model_file = args.cls_model
+cls_params_file = ""
+# Recognition模型，文字识别模型
+rec_model_file = args.rec_model
+rec_params_file = ""
+rec_label_file = args.rec_label_file
+
+det_option, cls_option, rec_option = build_option(args)
+det_format, cls_format, rec_format = build_format(args)
+
+det_model = fd.vision.ocr.DBDetector(
+    det_model_file,
+    det_params_file,
+    runtime_option=det_option,
+    model_format=det_format)
+
+cls_model = fd.vision.ocr.Classifier(
+    cls_model_file,
+    cls_params_file,
+    runtime_option=cls_option,
+    model_format=cls_format)
+
+rec_model = fd.vision.ocr.Recognizer(
+    rec_model_file,
+    rec_params_file,
+    rec_label_file,
+    runtime_option=rec_option,
+    model_format=rec_format)
+
+# Det,Rec模型启用静态shape推理
+det_model.preprocessor.static_shape_infer = True
+rec_model.preprocessor.static_shape_infer = True
+
+if args.device == "npu":
+    det_model.preprocessor.disable_normalize()
+    det_model.preprocessor.disable_permute()
+    cls_model.preprocessor.disable_normalize()
+    cls_model.preprocessor.disable_permute()
+    rec_model.preprocessor.disable_normalize()
+    rec_model.preprocessor.disable_permute()
+
+# 创建PP-OCR，串联3个模型，其中cls_model可选，如无需求，可设置为None
+ppocr_v3 = fd.vision.ocr.PPOCRv3(
+    det_model=det_model, cls_model=cls_model, rec_model=rec_model)
+
+# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理
+ppocr_v3.cls_batch_size = 1
+ppocr_v3.rec_batch_size = 1
+
+# 预测图片准备
+im = cv2.imread(args.image)
+
+#预测并打印结果
+result = ppocr_v3.predict(im)
+
+print(result)
+
+# 可视化结果
+vis_im = fd.vision.vis_ppocr(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/c/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/c/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.c)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/c/infer.c
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/c/infer.c
@@ -0,0 +1,148 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fastdeploy_capi/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const char* model_dir, const char* image_file) {
+  char model_file[100];
+  char params_file[100];
+  char config_file[100];
+  int max_size = 99;
+  snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
+  snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
+  snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "deploy.yaml");
+
+  FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
+  FD_C_RuntimeOptionWrapperUseCpu(option);
+
+  FD_C_PaddleSegModelWrapper* model = FD_C_CreatePaddleSegModelWrapper(
+      model_file, params_file, config_file, option, PADDLE);
+
+  if (!FD_C_PaddleSegModelWrapperInitialized(model)) {
+    printf("Failed to initialize.\n");
+    FD_C_DestroyRuntimeOptionWrapper(option);
+    FD_C_DestroyPaddleSegModelWrapper(model);
+    return;
+  }
+
+  FD_C_Mat im = FD_C_Imread(image_file);
+
+  FD_C_SegmentationResult* result =
+      (FD_C_SegmentationResult*)malloc(sizeof(FD_C_SegmentationResult));
+
+  if (!FD_C_PaddleSegModelWrapperPredict(model, im, result)) {
+    printf("Failed to predict.\n");
+    FD_C_DestroyRuntimeOptionWrapper(option);
+    FD_C_DestroyPaddleSegModelWrapper(model);
+    FD_C_DestroyMat(im);
+    free(result);
+    return;
+  }
+
+  // print res
+  char res[2000];
+  FD_C_SegmentationResultStr(result, res);
+  printf("%s", res);
+
+  FD_C_Mat vis_im = FD_C_VisSegmentation(im, result, 0.5);
+
+  FD_C_Imwrite("vis_result.jpg", vis_im);
+  printf("Visualized result saved in ./vis_result.jpg\n");
+  FD_C_DestroyRuntimeOptionWrapper(option);
+  FD_C_DestroyPaddleSegModelWrapper(model);
+  FD_C_DestroySegmentationResult(result);
+  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
+}
+
+void GpuInfer(const char* model_dir, const char* image_file) {
+  char model_file[100];
+  char params_file[100];
+  char config_file[100];
+  int max_size = 99;
+  snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
+  snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
+  snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "deploy.yaml");
+
+  FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
+  FD_C_RuntimeOptionWrapperUseGpu(option, 0);
+
+  FD_C_PaddleSegModelWrapper* model = FD_C_CreatePaddleSegModelWrapper(
+      model_file, params_file, config_file, option, PADDLE);
+
+  if (!FD_C_PaddleSegModelWrapperInitialized(model)) {
+    printf("Failed to initialize.\n");
+    FD_C_DestroyRuntimeOptionWrapper(option);
+    FD_C_DestroyPaddleSegModelWrapper(model);
+    return;
+  }
+
+  FD_C_Mat im = FD_C_Imread(image_file);
+
+  FD_C_SegmentationResult* result =
+      (FD_C_SegmentationResult*)malloc(sizeof(FD_C_SegmentationResult));
+
+  if (!FD_C_PaddleSegModelWrapperPredict(model, im, result)) {
+    printf("Failed to predict.\n");
+    FD_C_DestroyRuntimeOptionWrapper(option);
+    FD_C_DestroyPaddleSegModelWrapper(model);
+    FD_C_DestroyMat(im);
+    free(result);
+    return;
+  }
+
+  // print res
+  char res[2000];
+  FD_C_SegmentationResultStr(result, res);
+  printf("%s", res);
+
+  FD_C_Mat vis_im = FD_C_VisSegmentation(im, result, 0.5);
+
+  FD_C_Imwrite("vis_result.jpg", vis_im);
+  printf("Visualized result saved in ./vis_result.jpg\n");
+  FD_C_DestroyRuntimeOptionWrapper(option);
+  FD_C_DestroyPaddleSegModelWrapper(model);
+  FD_C_DestroySegmentationResult(result);
+  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    printf(
+        "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+        "e.g ./infer_model ./ppseg_model_dir ./test.jpeg 0"
+        "\n");
+    printf(
+        "The data type of run_option is int, 0: run with cpu; 1: run with gpu"
+        "\n");
+    return -1;
+  }
+
+  if (atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -49,9 +49,20 @@ bool FastDeployModel::IsSupported(const std::vector<Backend>& backends,
              << "the backend [" << backend
              << "] is supported for current model!" << std::endl;
    return true;
+  } else if (!enable_valid_backend_check_) {
+    FDWARNING << "Checking for valid backend is disable, we don't"
+              << " check to see if the backend [" << backend
+              << "] is supported for current model!" << std::endl;
+    return true;
  }
  return CheckBackendSupported(backends, backend);
 #else
+  if (!enable_valid_backend_check_) {
+    FDWARNING << "Checking for valid backend is disable, we don't"
+              << " check to see if the backend [" << backend
+              << "] is supported for current model!" << std::endl;
+    return true;
+  }
  return CheckBackendSupported(backends, backend);
 #endif
 }
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -121,7 +121,16 @@ class FASTDEPLOY_DECL FastDeployModel {
  virtual double GetProfileTime() {
    return runtime_->GetProfileTime();
  }
-
+  /** \brief Enable to check if current backend set by user can be found at valid_xxx_backend.
+   */
+  virtual void EnableValidBackendCheck() {
+    enable_valid_backend_check_ = true;
+  }
+  /** \brief Disable to check if current backend set by user can be found at valid_xxx_backend.
+   */
+  virtual void DisableValidBackendCheck() {
+    enable_valid_backend_check_ = false;
+  }
  /** \brief Release reused input/output buffers
  */
  virtual void ReleaseReusedBuffer() {
@@ -170,6 +179,8 @@ class FASTDEPLOY_DECL FastDeployModel {
  // whether to record inference time
  bool enable_record_time_of_runtime_ = false;
  std::vector<double> time_of_runtime_;
+  // enable the check for valid backend, default true.
+  bool enable_valid_backend_check_ = true;
 };

 }  // namespace fastdeploy
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -66,6 +66,8 @@ struct PaddleBackendOption {
  int mkldnn_cache_size = -1;
  /// initialize memory size(MB) for GPU
  int gpu_mem_init_size = 100;
+  /// The option to enable fixed size optimization for transformer model
+  bool enable_fixed_size_opt = false;

  /// Disable type of operators run on TensorRT
  void DisableTrtOps(const std::vector<std::string>& ops) {
--- a/fastdeploy/runtime/backends/paddle/option_pybind.cc
+++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc
@@ -36,6 +36,8 @@ void BindPaddleOption(pybind11::module& m) {
  BindIpuOption(m);
  pybind11::class_<PaddleBackendOption>(m, "PaddleBackendOption")
      .def(pybind11::init())
+      .def_readwrite("enable_fixed_size_opt",
+                     &PaddleBackendOption::enable_fixed_size_opt)
      .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
      .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
      .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -58,6 +58,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
                                   option.trt_option.max_batch_size, 3,
                                   precision, use_static);
      SetTRTDynamicShapeToConfig(option);
+      if (option_.enable_fixed_size_opt) {
+        paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
+            &config_, "opt");
+      }
    }
  } else if (option.device == Device::IPU) {
 #ifdef WITH_IPU
@@ -99,28 +103,36 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
 }

 bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
-  if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
+  if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
+        Supported(runtime_option.device, Backend::PDINFER))) {
    return false;
  }

  auto option = runtime_option;
  option.paddle_infer_option.model_file = runtime_option.model_file;
  option.paddle_infer_option.params_file = runtime_option.params_file;
-  option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
+  option.paddle_infer_option.model_from_memory_ =
+      runtime_option.model_from_memory_;
  option.paddle_infer_option.device = runtime_option.device;
  option.paddle_infer_option.device_id = runtime_option.device_id;
-  option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
+  option.paddle_infer_option.enable_pinned_memory =
+      runtime_option.enable_pinned_memory;
  option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  option.paddle_infer_option.trt_option = runtime_option.trt_option;
  option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
  if (option.model_from_memory_) {
-    return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
+    return InitFromPaddle(option.model_file, option.params_file,
+                          option.paddle_infer_option);
  } else {
    std::string model_buffer = "";
    std::string params_buffer = "";
-    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
-    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
-    return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
+    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+             "Failed to read model file from %s.", option.model_file.c_str());
+    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
+             "Failed to read parameters file from %s.",
+             option.params_file.c_str());
+    return InitFromPaddle(model_buffer, params_buffer,
+                          option.paddle_infer_option);
  }
  return false;
 }
--- a/fastdeploy/vision/ocr/ppocr/classifier.cc
+++ b/fastdeploy/vision/ocr/ppocr/classifier.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "fastdeploy/vision/ocr/ppocr/classifier.h"
+
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"

@@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file,
                       const RuntimeOption& custom_option,
                       const ModelFormat& model_format) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT,
-                          Backend::OPENVINO}; 
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
    valid_kunlunxin_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
@@ -54,16 +56,18 @@ bool Classifier::Initialize() {
 }

 std::unique_ptr<Classifier> Classifier::Clone() const {
-  std::unique_ptr<Classifier> clone_model = utils::make_unique<Classifier>(Classifier(*this));
+  std::unique_ptr<Classifier> clone_model =
+      utils::make_unique<Classifier>(Classifier(*this));
  clone_model->SetRuntime(clone_model->CloneRuntime());
  return clone_model;
 }

-bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) {
+bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label,
+                         float* cls_score) {
  std::vector<int32_t> cls_labels(1);
  std::vector<float> cls_scores(1);
  bool success = BatchPredict({img}, &cls_labels, &cls_scores);
-  if(!success){
+  if (!success) {
    return success;
  }
  *cls_label = cls_labels[0];
@@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor
 }

 bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
-                              std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores) {
+                              std::vector<int32_t>* cls_labels,
+                              std::vector<float>* cls_scores) {
  return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
 }

 bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
-                              std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores,
+                              std::vector<int32_t>* cls_labels,
+                              std::vector<float>* cls_scores,
                              size_t start_index, size_t end_index) {
  size_t total_size = images.size();
  std::vector<FDMat> fd_images = WrapMat(images);
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) {
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+                         end_index)) {
    FDERROR << "Failed to preprocess the input image." << std::endl;
    return false;
  }
@@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
    return false;
  }

-  if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) {
-    FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
+  if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores,
+                          start_index, total_size)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
    return false;
  }
  return true;
 }

-}  // namesapce ocr
+}  // namespace ocr
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
@@ -13,9 +13,10 @@
 // limitations under the License.

 #include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h"
+
+#include "fastdeploy/function/concat.h"
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
-#include "fastdeploy/function/concat.h"

 namespace fastdeploy {
 namespace vision {
@@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat,
  Resize::Run(mat, resize_w, img_h);
 }

-bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) {
+bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
+                                 std::vector<FDTensor>* outputs) {
  return Run(images, outputs, 0, images->size());
 }

-bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
+bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
+                                 std::vector<FDTensor>* outputs,
                                 size_t start_index, size_t end_index) {
-
-  if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) {
-    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
+  if (images->size() == 0 || start_index < 0 || end_index <= start_index ||
+      end_index > images->size()) {
+    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+               "end_index <= images->size()"
+            << std::endl;
    return false;
  }

  for (size_t i = start_index; i < end_index; ++i) {
    FDMat* mat = &(images->at(i));
    OcrClassifierResizeImage(mat, cls_image_shape_);
-    Normalize::Run(mat, mean_, scale_, is_scale_);
+    if (!disable_normalize_) {
+      Normalize::Run(mat, mean_, scale_, is_scale_);
+    }
    std::vector<float> value = {0, 0, 0};
    if (mat->Width() < cls_image_shape_[2]) {
      Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value);
    }
-    HWC2CHW::Run(mat);
-    Cast::Run(mat, "float");
+
+    if (!disable_permute_) {
+      HWC2CHW::Run(mat);
+      Cast::Run(mat, "float");
+    }
  }
  // Only have 1 output Tensor.
  outputs->resize(1);
  // Concat all the preprocessed data to a batch tensor
  size_t tensor_size = end_index - start_index;
-  std::vector<FDTensor> tensors(tensor_size); 
+  std::vector<FDTensor> tensors(tensor_size);
  for (size_t i = 0; i < tensor_size; ++i) {
    (*images)[i + start_index].ShareWithTensor(&(tensors[i]));
    tensors[i].ExpandDim(0);
--- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h
@@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor {
  /// Get cls_image_shape for the classification preprocess
  std::vector<int> GetClsImageShape() const { return cls_image_shape_; }

+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
 private:
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
  std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
  bool is_scale_ = true;
--- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc
+++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
@@ -36,6 +36,7 @@ DBDetector::DBDetector(const std::string& model_file,
    valid_kunlunxin_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
  }

  runtime_option = custom_option;
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
@@ -20,9 +20,13 @@ namespace fastdeploy {
 namespace vision {
 namespace ocr {

-std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len) {
+std::array<int, 4> DBDetectorPreprocessor::OcrDetectorGetInfo(
+    FDMat* img, int max_size_len) {
  int w = img->Width();
  int h = img->Height();
+  if (static_shape_infer_) {
+    return {w, h, det_image_shape_[2], det_image_shape_[1]};
+  }

  float ratio = 1.f;
  int max_wh = w >= h ? w : h;
@@ -86,7 +90,10 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch,
    ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
                max_resize_w, max_resize_h);
  }
-  (*normalize_permute_op_)(image_batch);
+
+  if (!disable_normalize_ && !disable_permute_) {
+    (*normalize_permute_op_)(image_batch);
+  }

  outputs->resize(1);
  FDTensor* tensor = image_batch->Tensor();
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
@@ -59,14 +59,44 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager {
    return &batch_det_img_info_;
  }

+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
+  /// Set det_image_shape for the detection preprocess.
+  /// This api is usually used when you retrain the model.
+  /// Generally, you do not need to use it.
+  void SetDetImageShape(const std::vector<int>& det_image_shape) {
+    det_image_shape_ = det_image_shape;
+  }
+  /// Get cls_image_shape for the classification preprocess
+  std::vector<int> GetDetImageShape() const { return det_image_shape_; }
+
+  /// Set static_shape_infer is true or not. When deploy PP-OCR
+  /// on hardware which can not support dynamic input shape very well,
+  /// like Huawei Ascned, static_shape_infer needs to to be true.
+  void SetStaticShapeInfer(bool static_shape_infer) {
+    static_shape_infer_ = static_shape_infer;
+  }
+  /// Get static_shape_infer of the recognition preprocess
+  bool GetStaticShapeInfer() const { return static_shape_infer_; }
+
 private:
  bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
                   int max_resize_h);
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
  int max_side_len_ = 960;
  std::vector<std::array<int, 4>> batch_det_img_info_;
  std::shared_ptr<Resize> resize_op_;
  std::shared_ptr<Pad> pad_op_;
  std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
+  std::vector<int> det_image_shape_ = {3, 960, 960};
+  bool static_shape_infer_ = false;
+  std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len);
 };

 }  // namespace ocr
--- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
+++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -26,6 +26,9 @@ void BindPPOCRModel(pybind11::module& m) {
  pybind11::class_<vision::ocr::DBDetectorPreprocessor>(
      m, "DBDetectorPreprocessor")
      .def(pybind11::init<>())
+      .def_property("static_shape_infer",
+                    &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
+                    &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
      .def_property("max_side_len",
                    &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
                    &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
@@ -33,19 +36,27 @@ void BindPPOCRModel(pybind11::module& m) {
           [](vision::ocr::DBDetectorPreprocessor& self,
              const std::vector<float>& mean, const std::vector<float>& std,
              bool is_scale) { self.SetNormalize(mean, std, is_scale); })
-      .def("run", [](vision::ocr::DBDetectorPreprocessor& self,
-                     std::vector<pybind11::array>& im_list) {
-        std::vector<vision::FDMat> images;
-        for (size_t i = 0; i < im_list.size(); ++i) {
-          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
-        }
-        std::vector<FDTensor> outputs;
-        self.Run(&images, &outputs);
-        auto batch_det_img_info = self.GetBatchImgInfo();
-        for (size_t i = 0; i < outputs.size(); ++i) {
-          outputs[i].StopSharing();
-        }
-        return std::make_pair(outputs, *batch_det_img_info);
+      .def("run",
+           [](vision::ocr::DBDetectorPreprocessor& self,
+              std::vector<pybind11::array>& im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             self.Run(&images, &outputs);
+             auto batch_det_img_info = self.GetBatchImgInfo();
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return std::make_pair(outputs, *batch_det_img_info);
+           })
+      .def("disable_normalize",
+           [](vision::ocr::DBDetectorPreprocessor& self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) {
+        self.DisablePermute();
      });

  pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
@@ -135,21 +146,30 @@ void BindPPOCRModel(pybind11::module& m) {
      .def_property("is_scale",
                    &vision::ocr::ClassifierPreprocessor::GetIsScale,
                    &vision::ocr::ClassifierPreprocessor::SetIsScale)
-      .def("run", [](vision::ocr::ClassifierPreprocessor& self,
-                     std::vector<pybind11::array>& im_list) {
-        std::vector<vision::FDMat> images;
-        for (size_t i = 0; i < im_list.size(); ++i) {
-          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
-        }
-        std::vector<FDTensor> outputs;
-        if (!self.Run(&images, &outputs)) {
-          throw std::runtime_error(
-              "Failed to preprocess the input data in ClassifierPreprocessor.");
-        }
-        for (size_t i = 0; i < outputs.size(); ++i) {
-          outputs[i].StopSharing();
-        }
-        return outputs;
+      .def("run",
+           [](vision::ocr::ClassifierPreprocessor& self,
+              std::vector<pybind11::array>& im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "ClassifierPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def("disable_normalize",
+           [](vision::ocr::ClassifierPreprocessor& self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) {
+        self.DisablePermute();
      });

  pybind11::class_<vision::ocr::ClassifierPostprocessor>(
@@ -229,21 +249,30 @@ void BindPPOCRModel(pybind11::module& m) {
      .def_property("is_scale",
                    &vision::ocr::RecognizerPreprocessor::GetIsScale,
                    &vision::ocr::RecognizerPreprocessor::SetIsScale)
-      .def("run", [](vision::ocr::RecognizerPreprocessor& self,
-                     std::vector<pybind11::array>& im_list) {
-        std::vector<vision::FDMat> images;
-        for (size_t i = 0; i < im_list.size(); ++i) {
-          images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
-        }
-        std::vector<FDTensor> outputs;
-        if (!self.Run(&images, &outputs)) {
-          throw std::runtime_error(
-              "Failed to preprocess the input data in RecognizerPreprocessor.");
-        }
-        for (size_t i = 0; i < outputs.size(); ++i) {
-          outputs[i].StopSharing();
-        }
-        return outputs;
+      .def("run",
+           [](vision::ocr::RecognizerPreprocessor& self,
+              std::vector<pybind11::array>& im_list) {
+             std::vector<vision::FDMat> images;
+             for (size_t i = 0; i < im_list.size(); ++i) {
+               images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+             }
+             std::vector<FDTensor> outputs;
+             if (!self.Run(&images, &outputs)) {
+               throw std::runtime_error(
+                   "Failed to preprocess the input data in "
+                   "RecognizerPreprocessor.");
+             }
+             for (size_t i = 0; i < outputs.size(); ++i) {
+               outputs[i].StopSharing();
+             }
+             return outputs;
+           })
+      .def("disable_normalize",
+           [](vision::ocr::RecognizerPreprocessor& self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) {
+        self.DisablePermute();
      });

  pybind11::class_<vision::ocr::RecognizerPostprocessor>(
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
@@ -13,22 +13,23 @@
 // limitations under the License.

 #include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h"
+
+#include "fastdeploy/function/concat.h"
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
-#include "fastdeploy/function/concat.h"

 namespace fastdeploy {
 namespace vision {
 namespace ocr {

 void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
-                              const std::vector<int>& rec_image_shape, bool static_shape_infer) {
+                              const std::vector<int>& rec_image_shape,
+                              bool static_shape_infer) {
  int img_h, img_w;
  img_h = rec_image_shape[1];
  img_w = rec_image_shape[2];

  if (!static_shape_infer) {
-
    img_w = int(img_h * max_wh_ratio);
    float ratio = float(mat->Width()) / float(mat->Height());

@@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,

  } else {
    if (mat->Width() >= img_w) {
-      Resize::Run(mat, img_w, img_h); // Reszie W to 320
+      Resize::Run(mat, img_w, img_h);  // Reszie W to 320
    } else {
      Resize::Run(mat, mat->Width(), img_h);
      Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
      // Pad to 320
-    } 
+    }
  }
 }

-bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) {
+bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
+                                 std::vector<FDTensor>* outputs) {
  return Run(images, outputs, 0, images->size(), {});
 }

-bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
-                                 size_t start_index, size_t end_index, const std::vector<int>& indices) {
-  if (images->size() == 0 || end_index <= start_index || end_index > images->size()) {
-    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
+bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
+                                 std::vector<FDTensor>* outputs,
+                                 size_t start_index, size_t end_index,
+                                 const std::vector<int>& indices) {
+  if (images->size() == 0 || end_index <= start_index ||
+      end_index > images->size()) {
+    FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+               "end_index <= images->size()"
+            << std::endl;
    return false;
  }

@@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
  int img_w = rec_image_shape_[2];
  float max_wh_ratio = img_w * 1.0 / img_h;
  float ori_wh_ratio;
-  
+
  for (size_t i = start_index; i < end_index; ++i) {
    size_t real_index = i;
    if (indices.size() != 0) {
@@ -84,20 +91,31 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
      real_index = indices[i];
    }
    FDMat* mat = &(images->at(real_index));
-    OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_);
-    NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
+    OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_,
+                             static_shape_infer_);
+    if (!disable_normalize_ && !disable_permute_) {
+      NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
+    } else {
+      if (!disable_normalize_) {
+        Normalize::Run(mat, mean_, scale_, is_scale_);
+      }
+      if (!disable_permute_) {
+        HWC2CHW::Run(mat);
+        Cast::Run(mat, "float");
+      }
+    }
  }
  // Only have 1 output Tensor.
  outputs->resize(1);
-  size_t tensor_size = end_index-start_index;
+  size_t tensor_size = end_index - start_index;
  // Concat all the preprocessed data to a batch tensor
-  std::vector<FDTensor> tensors(tensor_size); 
+  std::vector<FDTensor> tensors(tensor_size);
  for (size_t i = 0; i < tensor_size; ++i) {
    size_t real_index = i + start_index;
    if (indices.size() != 0) {
      real_index = indices[i + start_index];
    }
-    
+
    (*images)[real_index].ShareWithTensor(&(tensors[i]));
    tensors[i].ExpandDim(0);
  }
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
@@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor {
  /// Get rec_image_shape for the recognition preprocess
  std::vector<int> GetRecImageShape() { return rec_image_shape_; }

+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() { disable_permute_ = true; }
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() { disable_normalize_ = true; }
+
 private:
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
  std::vector<int> rec_image_shape_ = {3, 48, 320};
  std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
--- a/fastdeploy/vision/ocr/ppocr/recognizer.cc
+++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "fastdeploy/vision/ocr/ppocr/recognizer.h"
+
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"

@@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file,
                       const std::string& params_file,
                       const std::string& label_path,
                       const RuntimeOption& custom_option,
-                       const ModelFormat& model_format):postprocessor_(label_path) {
+                       const ModelFormat& model_format)
+    : postprocessor_(label_path) {
  if (model_format == ModelFormat::ONNX) {
-    valid_cpu_backends = {Backend::ORT,
-                          Backend::OPENVINO};  
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  
+    valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+    valid_gpu_backends = {Backend::ORT, Backend::TRT};
  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+                          Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
    valid_kunlunxin_backends = {Backend::LITE};
-    valid_ascend_backends = {Backend::LITE}; 
+    valid_ascend_backends = {Backend::LITE};
    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+    valid_rknpu_backends = {Backend::RKNPU2};
  }

  runtime_option = custom_option;
@@ -57,12 +60,14 @@ bool Recognizer::Initialize() {
 }

 std::unique_ptr<Recognizer> Recognizer::Clone() const {
-  std::unique_ptr<Recognizer> clone_model = utils::make_unique<Recognizer>(Recognizer(*this));
+  std::unique_ptr<Recognizer> clone_model =
+      utils::make_unique<Recognizer>(Recognizer(*this));
  clone_model->SetRuntime(clone_model->CloneRuntime());
  return clone_model;
 }

-bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) {
+bool Recognizer::Predict(const cv::Mat& img, std::string* text,
+                         float* rec_score) {
  std::vector<std::string> texts(1);
  std::vector<float> rec_scores(1);
  bool success = BatchPredict({img}, &texts, &rec_scores);
@@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score
 }

 bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
-                              std::vector<std::string>* texts, std::vector<float>* rec_scores) {
+                              std::vector<std::string>* texts,
+                              std::vector<float>* rec_scores) {
  return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
 }

 bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
-                              std::vector<std::string>* texts, std::vector<float>* rec_scores,
-                              size_t start_index, size_t end_index, const std::vector<int>& indices) {
+                              std::vector<std::string>* texts,
+                              std::vector<float>* rec_scores,
+                              size_t start_index, size_t end_index,
+                              const std::vector<int>& indices) {
  size_t total_size = images.size();
  if (indices.size() != 0 && indices.size() != total_size) {
    FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
    return false;
  }
  std::vector<FDMat> fd_images = WrapMat(images);
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) {
+  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+                         end_index, indices)) {
    FDERROR << "Failed to preprocess the input image." << std::endl;
    return false;
  }
@@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
    return false;
  }

-  if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) {
-    FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
+  if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores,
+                          start_index, total_size, indices)) {
+    FDERROR << "Failed to postprocess the inference cls_results by runtime."
+            << std::endl;
    return false;
  }
  return true;
 }

-}  // namesapce ocr
+}  // namespace ocr
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/visualize/detection.cc
+++ b/fastdeploy/vision/visualize/detection.cc
@@ -22,6 +22,9 @@ namespace vision {

 cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
                     float score_threshold, int line_size, float font_size) {
+  if (result.boxes.empty()) {
+    return im;
+  }
  if (result.contain_masks) {
    FDASSERT(result.boxes.size() == result.masks.size(),
             "The size of masks must be equal to the size of boxes, but now "
@@ -106,6 +109,9 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
 cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
                     const std::vector<std::string>& labels,
                     float score_threshold, int line_size, float font_size) {
+  if (result.boxes.empty()) {
+    return im;
+  }
  if (result.contain_masks) {
    FDASSERT(result.boxes.size() == result.masks.size(),
             "The size of masks must be equal to the size of boxes, but now "
@@ -203,6 +209,9 @@ cv::Mat Visualize::VisDetection(const cv::Mat& im,
                                const DetectionResult& result,
                                float score_threshold, int line_size,
                                float font_size) {
+  if (result.boxes.empty()) {
+    return im;
+  }
  FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisDetection is "
               "deprecated, please use fastdeploy::vision:VisDetection "
               "function instead."
--- a/fastdeploy/vision/visualize/ocr.cc
+++ b/fastdeploy/vision/visualize/ocr.cc
@@ -17,10 +17,14 @@
 namespace fastdeploy {
 namespace vision {

-cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
+               const float score_threshold) {
  auto vis_im = im.clone();

  for (int n = 0; n < ocr_result.boxes.size(); n++) {
+    if (ocr_result.rec_scores[n] < score_threshold) {
+      continue;
+    }
    cv::Point rook_points[4];

    for (int m = 0; m < 4; m++) {
@@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
                                 int(ocr_result.boxes[n][m * 2 + 1]));
    }

-    const cv::Point *ppt[1] = {rook_points};
+    const cv::Point* ppt[1] = {rook_points};
    int npt[] = {4};
    cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
  }
@@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
  return vis_im;
 }

-cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) {
  FDWARNING
      << "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
         "please use fastdeploy::vision:VisOcr function instead."
@@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
                                 int(ocr_result.boxes[n][m * 2 + 1]));
    }

-    const cv::Point *ppt[1] = {rook_points};
+    const cv::Point* ppt[1] = {rook_points};
    int npt[] = {4};
    cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
  }
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -15,8 +15,8 @@
 #pragma once

 #include "fastdeploy/vision/common/result.h"
-#include "opencv2/imgproc/imgproc.hpp"
 #include "fastdeploy/vision/tracking/pptracking/model.h"
+#include "opencv2/imgproc/imgproc.hpp"

 namespace fastdeploy {
 /** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace
@@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize {
                                 bool remove_small_connected_area = false);
  static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
                                          float threshold);
-  static cv::Mat SwapBackgroundMatting(
-      const cv::Mat& im, const cv::Mat& background, const MattingResult& result,
-      bool remove_small_connected_area = false);
+  static cv::Mat
+  SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background,
+                        const MattingResult& result,
+                        bool remove_small_connected_area = false);
  static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im,
                                            const cv::Mat& background,
                                            int background_label,
@@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
 * \param[in] font_size font size
 * \return cv::Mat type stores the visualized results
 */
-FASTDEPLOY_DECL cv::Mat VisClassification(
-  const cv::Mat& im, const ClassifyResult& result, int top_k = 5,
-  float score_threshold = 0.0f, float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im,
+                                          const ClassifyResult& result,
+                                          int top_k = 5,
+                                          float score_threshold = 0.0f,
+                                          float font_size = 0.5f);
 /** \brief Show the visualized results with custom labels for classification models
 *
 * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification(
 * \param[in] font_size font size
 * \return cv::Mat type stores the visualized results
 */
-FASTDEPLOY_DECL cv::Mat VisClassification(
-  const cv::Mat& im, const ClassifyResult& result,
-  const std::vector<std::string>& labels, int top_k = 5,
-  float score_threshold = 0.0f, float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat
+VisClassification(const cv::Mat& im, const ClassifyResult& result,
+                  const std::vector<std::string>& labels, int top_k = 5,
+                  float score_threshold = 0.0f, float font_size = 0.5f);
 /** \brief Show the visualized results for face detection models
 *
 * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im,
 * \param[in] result the result produced by model
 * \return cv::Mat type stores the visualized results
 */
-FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result);
+FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
+                               const float score_threshold = 0);

 FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
                               float score_threshold = 0.0f,
@@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
 * \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas
 * \return cv::Mat type stores the visualized results
 */
-FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
-                                      const cv::Mat& background,
-                                      const MattingResult& result,
-                                      bool remove_small_connected_area = false);
+FASTDEPLOY_DECL cv::Mat
+SwapBackground(const cv::Mat& im, const cv::Mat& background,
+               const MattingResult& result,
+               bool remove_small_connected_area = false);
 /** \brief Swap the image background with SegmentationResult
 *
 * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
 * \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold
 * \return cv::Mat type stores the visualized results
 */
-FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im,
-                        const KeyPointDetectionResult& results,
-                        float conf_threshold = 0.5f);
+FASTDEPLOY_DECL cv::Mat
+VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results,
+                     float conf_threshold = 0.5f);
 FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
-                                    const HeadPoseResult& result,
-                                    int size = 50,
+                                    const HeadPoseResult& result, int size = 50,
                                    int line_size = 1);

 }  // namespace vision
--- a/python/fastdeploy/vision/keypointdetection/pptinypose/init.py
+++ b/python/fastdeploy/vision/keypointdetection/pptinypose/init.py
@@ -76,10 +76,10 @@ class PPTinyPose(FastDeployModel):
        """
        This function will disable normalize in preprocessing step.
        """
-        self.disable_normalize()
+        self._model.disable_normalize()

    def disable_permute(self):
        """
        This function will disable hwc2chw in preprocessing step.
        """
-        self.disable_permute()
+        self._model.disable_permute()
--- a/python/fastdeploy/vision/ocr/ppocr/init.py
+++ b/python/fastdeploy/vision/ocr/ppocr/init.py
@@ -65,6 +65,29 @@ class DBDetectorPreprocessor:
        """
        self._preprocessor.set_normalize(mean, std, is_scale)

+    @property
+    def static_shape_infer(self):
+        return self._preprocessor.static_shape_infer
+
+    @static_shape_infer.setter
+    def static_shape_infer(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `static_shape_infer` must be type of bool."
+        self._preprocessor.static_shape_infer = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
+

 class DBDetectorPostprocessor:
    def __init__(self):
@@ -358,6 +381,18 @@ class ClassifierPreprocessor:
            list), "The value to set `cls_image_shape` must be type of list."
        self._preprocessor.cls_image_shape = value

+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
+

 class ClassifierPostprocessor:
    def __init__(self):
@@ -581,6 +616,18 @@ class RecognizerPreprocessor:
            list), "The value to set `rec_image_shape` must be type of list."
        self._preprocessor.rec_image_shape = value

+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self._preprocessor.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self._preprocessor.disable_permute()
+

 class RecognizerPostprocessor:
    def __init__(self, label_path):
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -48,9 +48,10 @@ RUN python3 -m pip install  /opt/fastdeploy/*.whl \
    && rm -rf /opt/fastdeploy/*.whl

 # unset proxy
-ENV http_proxy=
-ENV https_proxy=
-RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+# ENV http_proxy=
+# ENV https_proxy=
+# RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+RUN python3 -m pip install paddlepaddle-gpu==2.4.1

 COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
 COPY build/fastdeploy_install /opt/fastdeploy/
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -1,9 +1,9 @@
 English | [中文](../zh_CN/compile.md)
-# FastDeploy Serving Deployment Image Compilation
+# FastDeploy Serving Deployment Compilation

-This document is about how to create a FastDploy image.
+## Compilation with Docker containers

-## GPU Image
+### GPU Image

 The GPU images published by FastDploy are based on version 21.10 of [Triton Inference Server](https://github.com/triton-inference-server/server). If developers need to use other CUDA versions, please refer to [ NVIDIA official website](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) to modify the scripts in Dockerfile and scripts.

@@ -39,7 +39,7 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:1.0.3-gpu-cuda11.2-trt8.4-21.10 -f serving/Dockerfile_CUDA_11_2 .
 ```

-## CPU Image
+### CPU Image

 ```shell
 # Enter the serving directory and execute the script to compile the FastDeploy and serving backend
@@ -53,7 +53,7 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 -f serving/Dockerfile_cpu .
 ```

-## IPU Image
+### IPU Image

 ```shell
 # Enter the serving directory and execute the script to compile the FastDeploy and serving backend
@@ -65,3 +65,7 @@ bash scripts/build_fd_ipu.sh
 cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-ipu-only-21.10 -f serving/Dockerfile_ipu .
 ```
+
+## Compilation without Docker containers
+
+- [FastDeploy Serving CentOS Compilation Tutorial](./compile_without_docker_centos-en.md)
--- a/serving/docs/EN/compile_without_docker_centos-en.md
+++ b/serving/docs/EN/compile_without_docker_centos-en.md
@@ -0,0 +1,215 @@
+English | [中文](../zh_CN/compile_without_docker_centos.md)
+
+# FastDeploy Serving CentOS Compilation Tutorial
+
+This tutorial introduces how to install dependencies, compile and package FastDeploy Serving in the CentOS environment, and the user can finally install the deployment package into the CentOS system without relying on the docker container.
+
+If the deployment environment has `sudo` permission, it can be compiled and packaged directly in the deployment environment. If you do not have `sudo` permission and cannot use `yum` to install, you can create a docker container which has the same environment as the deployment machine to compile and package, and finally upload the package to the deployment environment.
+
+This tutorial is for GPU environment. For CPU-Only enviroment, you can tailor it according to the content of this tutorial, mainly including:
+
+- No need for CUDA, TensorRT, datacenter-gpu-manager and other GPU dependencies
+- When compiling tritonserver, remove --enable-gpu and --enable-gpu-metrics
+- Disable GPU-related options such as WITH_GPU and ENABLE_TRT_BACKEND when compiling FastDeploy Runtime
+
+## 1. Environments
+
+* CentOS Linux release 7.9.2009
+* CUDA 11.2 (consistent with the deployment env)
+* Python 3.8 (prefer to use conda)
+* GCC 9.4.0
+
+## 2. Compile GCC
+
+Follow the steps below to compile GCC 9.4.0. After `make install`, you can package the /opt/gcc-9.4.0/ directory for backup, which can be reused later.
+
+```
+wget http://gnu.mirror.constant.com/gcc/gcc-9.4.0/gcc-9.4.0.tar.gz
+tar xvf gcc-9.4.0.tar.gz
+cd gcc-9.4.0
+mkdir build
+cd build
+../configure --enable-languages=c,c++ --disable-multilib --prefix=/opt/gcc-9.4.0/
+make -j8
+make install
+```
+
+## 3. Install dependencies for tritonserver
+
+Dependencies which can be installed by `yum`:
+
+```
+yum install numactl-devel
+yum install libarchive-devel
+yum install re2-devel
+
+wget http://www6.atomicorp.com/channels/atomic/centos/7/x86_64/RPMS/libb64-libs-1.2.1-2.1.el7.art.x86_64.rpm
+wget http://www6.atomicorp.com/channels/atomic/centos/7/x86_64/RPMS/libb64-devel-1.2.1-2.1.el7.art.x86_64.rpm
+rpm -ivh libb64-libs-1.2.1-2.1.el7.art.x86_64.rpm
+rpm -ivh libb64-devel-1.2.1-2.1.el7.art.x86_64.rpm
+```
+
+Install rapidjson:
+
+```
+git clone https://github.com/Tencent/rapidjson.git
+cd rapidjson
+git submodule update --init
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=
+/opt/gcc-9.4.0/bin/g++
+cmake ..
+make install
+```
+
+Install boost 1.70:
+
+```
+wget https://boostorg.jfrog.io/artifactory/main/release/1.70.0/source/boost_1_70_0_rc2.tar.gz
+tar xvf boost_1_70_0_rc2.tar.gz
+cd boost_1_70_0
+./bootstrap.sh --prefix=/opt/boost
+./b2 install --prefix=/opt/boost --with=all
+```
+
+Install datacenter-gpu-manager:
+
+```
+dnf config-manager \
+    --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
+dnf clean expire-cache
+dnf install -y datacenter-gpu-manager
+```
+
+## 4. Compile tritonserver
+
+```
+cd /workspace
+git clone https://github.com/triton-inference-server/server.git -b r21.10
+cd server
+mkdir -p build/tritonserver/install
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ \
+BOOST_LIBRARYDIR=/opt/boost/lib BOOST_INCLUDEDIR=/opt/boost/include \
+python build.py \
+     --build-dir `pwd`/build \
+     --no-container-build \
+     --backend=ensemble \
+     --enable-gpu \
+     --endpoint=grpc \
+     --endpoint=http \
+     --enable-stats \
+     --enable-tracing \
+     --enable-logging \
+     --enable-stats \
+     --enable-metrics \
+     --enable-gpu-metrics \
+     --cmake-dir `pwd`/build \
+     --repo-tag=common:r21.10 \
+     --repo-tag=core:r21.10 \
+     --repo-tag=backend:r21.10 \
+     --repo-tag=thirdparty:r21.10 \
+     --backend=python:r21.10
+```
+
+## 5. Compile FastDeploy Runtime and Serving
+
+FastDeploy Runtime depends on TensorRT for GPU serving, so TRT_DIRECTORY is required.
+
+```
+cd /workspace/
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ cmake .. \
+  -DENABLE_TRT_BACKEND=ON \
+  -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install \
+  -DWITH_GPU=ON \
+  -DTRT_DIRECTORY=/workspace/TensorRT-8.4.3.1 \
+  -DENABLE_PADDLE_BACKEND=ON \
+  -DENABLE_ORT_BACKEND=ON \
+  -DENABLE_OPENVINO_BACKEND=ON \
+  -DENABLE_VISION=ON \
+  -DBUILD_FASTDEPLOY_PYTHON=OFF \
+  -DENABLE_PADDLE2ONNX=ON \
+  -DENABLE_TEXT=OFF \
+  -DLIBRARY_NAME=fastdeploy_runtime
+make -j8
+make install
+```
+
+Compile FastDeploy Serving:
+
+```
+cd /workspace/FastDeploy/serving/
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ cmake .. \
+  -DFASTDEPLOY_DIR=/workspace/FastDeploy/build/fastdeploy_install \
+  -DTRITON_COMMON_REPO_TAG=r21.10 \
+  -DTRITON_CORE_REPO_TAG=r21.10 \
+  -DTRITON_BACKEND_REPO_TAG=r21.10
+make -j8
+```
+
+## 6. Package
+
+Put the executable files, scripts, dependent libraries, etc. required for Serving to run inti one directory, and compress it into a tar.gz package.
+
+```
+# Put everything under /workspace/opt/
+cd /workspace/
+mkdir /workspace/opt
+
+# triton server
+mkdir -p opt/tritonserver
+cp -r /workspace/server/build/tritonserver/install/* opt/tritonserver
+
+# python backend
+mkdir -p opt/tritonserver/backends/python
+cp -r /workspace/server/build/python/install/backends/python opt/tritonserver/backends/
+
+# fastdeploy backend
+mkdir -p opt/tritonserver/backends/fastdeploy
+cp /workspace/FastDeploy/serving/build/libtriton_fastdeploy.so opt/tritonserver/backends/fastdeploy/
+
+# rename tritonserver to fastdeployserver
+mv opt/tritonserver/bin/tritonserver opt/tritonserver/bin/fastdeployserver
+
+# fastdeploy runtime
+cp -r /workspace/FastDeploy/build/fastdeploy_install/ opt/fastdeploy/
+
+# GCC
+cp -r /opt/gcc-9.4.0/ opt/
+```
+
+For some dependent libraries installed by yum, if the deployment environment does not have them, they also need to be packaged together and placed under opt/third_libs, including:
+
+* /lib64/libdcgm.so.3
+* /lib64/libnuma.so.1
+* /lib64/libre2.so.0
+* /lib64/libb64.so.0
+* /lib64/libarchive.so.13
+
+The final opt/ directory structure is as follows. README.md and init.sh need to be added by the packager. README.md needs to explain how to use the installation package, etc. init.sh is responsible for setting the environment variables required for FastDeploy Serving to run.
+
+```
+opt/
+├── fastdeploy
+├── gcc-9.4.0
+├── init.sh
+├── README.md
+└── tritonserver
+└── third_libs
+```
+
+init.sh example:
+
+```
+CURRENT_DIR=$(dirname $(readlink -f "${BASH_SOURCE}"))
+echo $CURRENT_DIR
+source $CURRENT_DIR/fastdeploy/fastdeploy_init.sh
+export PATH=$CURRENT_DIR/tritonserver/bin:$PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/gcc-9.4.0/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/tritonserver/backends/python/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/third_libs:$LD_LIBRARY_PATH
+unset CURRENT_DIR
+```
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -1,9 +1,9 @@
 中文 ｜ [English](../EN/compile-en.md)
-# 服务化部署镜像编译
+# 服务化部署编译

-本文档介绍如何制作FastDploy镜像
+## 制作服务化部署镜像

-## 制作GPU镜像
+### 制作GPU镜像

 FastDploy发布的GPU镜像基于[Triton Inference Server](https://github.com/triton-inference-server/server)的21.10版本进行制作，如果有其他CUDA版本需求，可以参照[NVIDIA 官网](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)中展示的版本信息修改Dockerfile和scripts中的脚本.

@@ -39,7 +39,7 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:1.0.3-gpu-cuda11.2-trt8.4-21.10 -f serving/Dockerfile_CUDA_11_2 .
 ```

-## 制作CPU镜像
+### 制作CPU镜像

 ```
 # 进入serving目录执行脚本编译fastdeploy和服务化的backend
@@ -52,7 +52,7 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 -f serving/Dockerfile_cpu .
 ```

-## 制作IPU镜像
+### 制作IPU镜像

 ```
 # 进入serving目录执行脚本编译fastdeploy和服务化的backend
@@ -64,3 +64,7 @@ bash scripts/build_fd_ipu.sh
 cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-ipu-only-21.10 -f serving/Dockerfile_ipu .
 ```
+
+## 非镜像方式编译
+
+- [FastDeploy Serving CentOS编译教程](./compile_without_docker_centos.md)
--- a/serving/docs/zh_CN/compile_without_docker_centos.md
+++ b/serving/docs/zh_CN/compile_without_docker_centos.md
@@ -0,0 +1,215 @@
+中文 ｜ [English](../EN/compile_without_docker_centos-en.md)
+
+# FastDeploy Serving CentOS编译教程
+
+本教程介绍如何在CentOS环境中安装依赖项、编译FastDeploy Serving并打包，用户最终可将部署包安装到CentOS系统中，不需要依赖docker容器。
+
+如果部署环境有sudo权限，则可在部署环境中直接编译和打包。如果没有sudo权限，无法使用yum安装，则可以在开发机中创建与部署环境一致的docker容器进行编译和打包，最终将部署包上传至部署环境进行部署和运行。
+
+本教程为GPU版本的FastDeploy Serving编译教程，对于CPU版的编译，可自行根据本教程内容进行裁剪，主要包括：
+
+- 不需要CUDA、TensorRT、datacenter-gpu-manager等依赖
+- 编译tritonserver时，去掉--enable-gpu和--enable-gpu-metrics
+- 编译FastDeploy Runtime时关闭GPU、TensorRT等GPU相关的选项
+
+## 1. 环境
+
+* CentOS Linux release 7.9.2009
+* CUDA 11.2（与部署环境中的CUDA版本保持一致）
+* Python 3.8（可使用conda环境）
+* GCC 9.4.0
+
+## 2. 编译GCC
+
+可按照以下步骤编译GCC 9.4.0，make install后，可将/opt/gcc-9.4.0/目录打包备份，后续可重复利用。
+
+```
+wget http://gnu.mirror.constant.com/gcc/gcc-9.4.0/gcc-9.4.0.tar.gz
+tar xvf gcc-9.4.0.tar.gz
+cd gcc-9.4.0
+mkdir build
+cd build
+../configure --enable-languages=c,c++ --disable-multilib --prefix=/opt/gcc-9.4.0/
+make -j8
+make install
+```
+
+## 3. 安装tritonserver编译所需的依赖库
+
+用yum安装的依赖项：
+
+```
+yum install numactl-devel
+yum install libarchive-devel
+yum install re2-devel
+
+wget http://www6.atomicorp.com/channels/atomic/centos/7/x86_64/RPMS/libb64-libs-1.2.1-2.1.el7.art.x86_64.rpm
+wget http://www6.atomicorp.com/channels/atomic/centos/7/x86_64/RPMS/libb64-devel-1.2.1-2.1.el7.art.x86_64.rpm
+rpm -ivh libb64-libs-1.2.1-2.1.el7.art.x86_64.rpm
+rpm -ivh libb64-devel-1.2.1-2.1.el7.art.x86_64.rpm
+```
+
+安装rapidjson：
+
+```
+git clone https://github.com/Tencent/rapidjson.git
+cd rapidjson
+git submodule update --init
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=
+/opt/gcc-9.4.0/bin/g++
+cmake ..
+make install
+```
+
+安装boost 1.70：
+
+```
+wget https://boostorg.jfrog.io/artifactory/main/release/1.70.0/source/boost_1_70_0_rc2.tar.gz
+tar xvf boost_1_70_0_rc2.tar.gz
+cd boost_1_70_0
+./bootstrap.sh --prefix=/opt/boost
+./b2 install --prefix=/opt/boost --with=all
+```
+
+安装datacenter-gpu-manager（libdcgm）：
+
+```
+dnf config-manager \
+    --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
+dnf clean expire-cache
+dnf install -y datacenter-gpu-manager
+```
+
+## 4. 编译tritonserver
+
+```
+cd /workspace
+git clone https://github.com/triton-inference-server/server.git -b r21.10
+cd server
+mkdir -p build/tritonserver/install
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ \
+BOOST_LIBRARYDIR=/opt/boost/lib BOOST_INCLUDEDIR=/opt/boost/include \
+python build.py \
+     --build-dir `pwd`/build \
+     --no-container-build \
+     --backend=ensemble \
+     --enable-gpu \
+     --endpoint=grpc \
+     --endpoint=http \
+     --enable-stats \
+     --enable-tracing \
+     --enable-logging \
+     --enable-stats \
+     --enable-metrics \
+     --enable-gpu-metrics \
+     --cmake-dir `pwd`/build \
+     --repo-tag=common:r21.10 \
+     --repo-tag=core:r21.10 \
+     --repo-tag=backend:r21.10 \
+     --repo-tag=thirdparty:r21.10 \
+     --backend=python:r21.10
+```
+
+## 5. 编译FastDeploy Runtime和Serving
+
+编译FastDeploy Runtime，这里需要依赖TensorRT，并指定TensorRT路径，8.0+版本都可以，需要与CUDA版本匹配
+
+```
+cd /workspace/
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ cmake .. \
+  -DENABLE_TRT_BACKEND=ON \
+  -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install \
+  -DWITH_GPU=ON \
+  -DTRT_DIRECTORY=/workspace/TensorRT-8.4.3.1 \
+  -DENABLE_PADDLE_BACKEND=ON \
+  -DENABLE_ORT_BACKEND=ON \
+  -DENABLE_OPENVINO_BACKEND=ON \
+  -DENABLE_VISION=ON \
+  -DBUILD_FASTDEPLOY_PYTHON=OFF \
+  -DENABLE_PADDLE2ONNX=ON \
+  -DENABLE_TEXT=OFF \
+  -DLIBRARY_NAME=fastdeploy_runtime
+make -j8
+make install
+```
+
+编译Serving：
+
+```
+cd /workspace/FastDeploy/serving/
+mkdir build && cd build
+CC=/opt/gcc-9.4.0/bin/gcc CXX=/opt/gcc-9.4.0/bin/g++ cmake .. \
+  -DFASTDEPLOY_DIR=/workspace/FastDeploy/build/fastdeploy_install \
+  -DTRITON_COMMON_REPO_TAG=r21.10 \
+  -DTRITON_CORE_REPO_TAG=r21.10 \
+  -DTRITON_BACKEND_REPO_TAG=r21.10
+make -j8
+```
+
+## 6. 打包
+
+将Serving运行所需的可执行文件、脚本、依赖库等，统一放置在一个目录下，并压缩为tar.gz包。
+
+```
+# 打包的文件将统一放置在/workspace/opt目录下
+cd /workspace/
+mkdir /workspace/opt
+
+# triton server
+mkdir -p opt/tritonserver
+cp -r /workspace/server/build/tritonserver/install/* opt/tritonserver
+
+# python backend
+mkdir -p opt/tritonserver/backends/python
+cp -r /workspace/server/build/python/install/backends/python opt/tritonserver/backends/
+
+# fastdeploy backend
+mkdir -p opt/tritonserver/backends/fastdeploy
+cp /workspace/FastDeploy/serving/build/libtriton_fastdeploy.so opt/tritonserver/backends/fastdeploy/
+
+# rename tritonserver to fastdeployserver
+mv opt/tritonserver/bin/tritonserver opt/tritonserver/bin/fastdeployserver
+
+# fastdeploy runtime
+cp -r /workspace/FastDeploy/build/fastdeploy_install/ opt/fastdeploy/
+
+# GCC
+cp -r /opt/gcc-9.4.0/ opt/
+```
+
+对于一些yum安装的依赖库，如果部署环境没有，也需要一同打包，放入opt/third_libs下，包括：
+
+* /lib64/libdcgm.so.3
+* /lib64/libnuma.so.1
+* /lib64/libre2.so.0
+* /lib64/libb64.so.0
+* /lib64/libarchive.so.13
+
+最终的opt/目录结构如下，README.md和init.sh需要打包人员添加，其中的README.md需要说明安装包的使用方法等，init.sh负责设置FastDeploy Serving运行所需的环境变量
+
+```
+opt/
+├── fastdeploy
+├── gcc-9.4.0
+├── init.sh
+├── README.md
+└── tritonserver
+└── third_libs
+```
+
+init.sh示例：
+
+```
+CURRENT_DIR=$(dirname $(readlink -f "${BASH_SOURCE}"))
+echo $CURRENT_DIR
+source $CURRENT_DIR/fastdeploy/fastdeploy_init.sh
+export PATH=$CURRENT_DIR/tritonserver/bin:$PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/gcc-9.4.0/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/tritonserver/backends/python/:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$CURRENT_DIR/third_libs:$LD_LIBRARY_PATH
+unset CURRENT_DIR
+```
--- a/tools/rknpu2/config/ppocrv3_cls.yaml
+++ b/tools/rknpu2/config/ppocrv3_cls.yaml
@@ -0,0 +1,15 @@
+mean:
+  -
+    - 127.5
+    - 127.5
+    - 127.5
+std:
+  -
+    - 127.5
+    - 127.5
+    - 127.5
+model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_ppocr_mobile_v2.0_cls_infer"
--- a/tools/rknpu2/config/ppocrv3_det.yaml
+++ b/tools/rknpu2/config/ppocrv3_det.yaml
@@ -0,0 +1,15 @@
+mean:
+  -
+    - 123.675
+    - 116.28
+    - 103.53
+std:
+  -
+    - 58.395
+    - 57.12
+    - 57.375
+model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_PP-OCRv3_det_infer"
--- a/tools/rknpu2/config/ppocrv3_rec.yaml
+++ b/tools/rknpu2/config/ppocrv3_rec.yaml
@@ -0,0 +1,15 @@
+mean:
+  -
+    - 127.5
+    - 127.5
+    - 127.5
+std:
+  -
+    - 127.5
+    - 127.5
+    - 127.5
+model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_PP-OCRv3_rec_infer"
--- a/tools/rknpu2/export.py
+++ b/tools/rknpu2/export.py
@@ -65,7 +65,10 @@ if __name__ == "__main__":
    if not os.path.exists(yaml_config["output_folder"]):
        os.mkdir(yaml_config["output_folder"])

-    model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0]
+    name_list = os.path.basename(yaml_config["model_path"]).split(".")
+    model_base_name = ""
+    for name in name_list[0:-1]:
+        model_base_name += name
    model_device_name = config.target_platform.lower()
    if yaml_config["do_quantization"]:
        model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"