From 22ca63982be6468ebce6335a4d6aa9c16f70c3f1 Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Wed, 10 Aug 2022 10:50:22 +0800
Subject: [PATCH] Refine code structure (#89)

* refine code structure

* refine code structure
---
 CMakeLists.txt                                |   35 +-
 csrcs/fastdeploy/CMakeLists.txt               |    0
 csrcs/fastdeploy/backends/backend.h           |   49 -
 .../backends/common/multiclass_nms.cc         |  224 -
 .../backends/common/multiclass_nms.h          |   45 -
 .../backends/ort/ops/multiclass_nms.cc        |  261 --
 .../backends/ort/ops/multiclass_nms.h         |   81 -
 csrcs/fastdeploy/backends/ort/ort_backend.cc  |  279 --
 csrcs/fastdeploy/backends/ort/ort_backend.h   |   93 -
 csrcs/fastdeploy/backends/ort/utils.cc        |   67 -
 csrcs/fastdeploy/backends/ort/utils.h         |   39 -
 .../backends/paddle/paddle_backend.cc         |  105 -
 .../backends/paddle/paddle_backend.h          |   78 -
 csrcs/fastdeploy/backends/paddle/util.cc      |   76 -
 .../backends/tensorrt/common/BatchStream.h    |  342 --
 .../backends/tensorrt/common/CPPLINT.cfg      |    1 -
 .../tensorrt/common/EntropyCalibrator.h       |  118 -
 .../backends/tensorrt/common/ErrorRecorder.h  |  115 -
 .../backends/tensorrt/common/README.md        |    1 -
 .../backends/tensorrt/common/argsParser.h     |  169 -
 .../backends/tensorrt/common/buffers.h        |  426 --
 .../backends/tensorrt/common/common.h         |  844 ----
 .../backends/tensorrt/common/getOptions.cpp   |  223 -
 .../backends/tensorrt/common/getOptions.h     |  128 -
 .../backends/tensorrt/common/half.h           | 3787 -----------------
 .../backends/tensorrt/common/logger.cpp       |   38 -
 .../backends/tensorrt/common/logger.h         |   35 -
 .../backends/tensorrt/common/logging.h        |  573 ---
 .../tensorrt/common/parserOnnxConfig.h        |  126 -
 .../backends/tensorrt/common/safeCommon.h     |   65 -
 .../backends/tensorrt/common/sampleConfig.h   |  251 --
 .../backends/tensorrt/common/sampleDevice.h   |  397 --
 .../tensorrt/common/sampleEngines.cpp         | 1710 --------
 .../backends/tensorrt/common/sampleEngines.h  |  195 -
 .../tensorrt/common/sampleInference.cpp       |  943 ----
 .../tensorrt/common/sampleInference.h         |   88 -
 .../tensorrt/common/sampleOptions.cpp         | 1634 -------
 .../backends/tensorrt/common/sampleOptions.h  |  311 --
 .../tensorrt/common/sampleReporting.cpp       |  480 ---
 .../tensorrt/common/sampleReporting.h         |  211 -
 .../backends/tensorrt/common/sampleUtils.h    |  494 ---
 .../backends/tensorrt/common/windows/getopt.c |  568 ---
 .../backends/tensorrt/common/windows/getopt.h |  124 -
 .../backends/tensorrt/trt_backend.cc          |  528 ---
 .../backends/tensorrt/trt_backend.h           |  113 -
 csrcs/fastdeploy/core/config.h.in             |   54 -
 csrcs/fastdeploy/core/fd_tensor.cc            |  134 -
 csrcs/fastdeploy/core/fd_tensor.h             |   87 -
 csrcs/fastdeploy/core/fd_type.cc              |  123 -
 csrcs/fastdeploy/core/fd_type.h               |   63 -
 csrcs/fastdeploy/fastdeploy_model.cc          |  145 -
 csrcs/fastdeploy/fastdeploy_model.h           |   67 -
 csrcs/fastdeploy/fastdeploy_runtime.cc        |  365 --
 csrcs/fastdeploy/fastdeploy_runtime.h         |  159 -
 csrcs/fastdeploy/function/eigen.cc            |   32 -
 csrcs/fastdeploy/function/eigen.h             |  109 -
 csrcs/fastdeploy/function/reduce.cc           |  246 --
 csrcs/fastdeploy/function/reduce.h            |  100 -
 csrcs/fastdeploy/function/reduce_functor.h    |   76 -
 csrcs/fastdeploy/pybind/fastdeploy_model.cc   |   35 -
 csrcs/fastdeploy/pybind/fastdeploy_runtime.cc |  134 -
 csrcs/fastdeploy/pybind/main.cc.in            |  127 -
 csrcs/fastdeploy/pybind/main.h                |   90 -
 csrcs/fastdeploy/text.h                       |   19 -
 csrcs/fastdeploy/text/common/option.h         |   26 -
 csrcs/fastdeploy/text/common/result.cc        |   18 -
 csrcs/fastdeploy/text/common/result.h         |   23 -
 .../text/postprocessor/postprocessor.cc       |   31 -
 .../text/postprocessor/postprocessor.h        |   34 -
 .../text/preprocessor/preprocessor.cc         |   32 -
 .../text/preprocessor/preprocessor.h          |   34 -
 csrcs/fastdeploy/text/text_model.cc           |   79 -
 csrcs/fastdeploy/text/text_model.h            |   51 -
 csrcs/fastdeploy/text/text_pybind.cc          |   13 -
 csrcs/fastdeploy/utils/perf.h                 |   49 -
 csrcs/fastdeploy/utils/unique_ptr.h           |   58 -
 csrcs/fastdeploy/utils/utils.cc               |   49 -
 csrcs/fastdeploy/utils/utils.h                |  150 -
 csrcs/fastdeploy/vision.h                     |   41 -
 csrcs/fastdeploy/vision/AddModel.md           |    3 -
 .../vision/common/processors/base.cc          |   61 -
 .../vision/common/processors/base.h           |   48 -
 .../vision/common/processors/cast.cc          |   64 -
 .../vision/common/processors/cast.h           |   37 -
 .../vision/common/processors/center_crop.cc   |   63 -
 .../vision/common/processors/center_crop.h    |   40 -
 .../common/processors/color_space_convert.cc  |   58 -
 .../common/processors/color_space_convert.h   |   44 -
 .../vision/common/processors/convert.cc       |   62 -
 .../vision/common/processors/convert.h        |   42 -
 .../vision/common/processors/hwc2chw.cc       |   75 -
 .../vision/common/processors/hwc2chw.h        |   33 -
 .../vision/common/processors/mat.cc           |  117 -
 .../fastdeploy/vision/common/processors/mat.h |   80 -
 .../vision/common/processors/normalize.cc     |   88 -
 .../vision/common/processors/normalize.h      |   53 -
 .../vision/common/processors/pad.cc           |  100 -
 .../fastdeploy/vision/common/processors/pad.h |   50 -
 .../vision/common/processors/pad_to_size.cc   |  141 -
 .../vision/common/processors/pad_to_size.h    |   46 -
 .../vision/common/processors/resize.cc        |   90 -
 .../vision/common/processors/resize.h         |   63 -
 .../common/processors/resize_by_short.cc      |   76 -
 .../common/processors/resize_by_short.h       |   49 -
 .../vision/common/processors/stride_pad.cc    |  124 -
 .../vision/common/processors/stride_pad.h     |   44 -
 .../vision/common/processors/transform.h      |   27 -
 csrcs/fastdeploy/vision/common/result.cc      |  306 --
 csrcs/fastdeploy/vision/common/result.h       |  148 -
 .../vision/detection/contrib/nanodet_plus.cc  |  355 --
 .../vision/detection/contrib/nanodet_plus.h   |  101 -
 .../detection/contrib/nanodet_plus_pybind.cc  |   39 -
 .../vision/detection/contrib/scaledyolov4.cc  |  255 --
 .../vision/detection/contrib/scaledyolov4.h   |  103 -
 .../detection/contrib/scaledyolov4_pybind.cc  |   41 -
 .../vision/detection/contrib/yolor.cc         |  253 --
 .../vision/detection/contrib/yolor.h          |  102 -
 .../vision/detection/contrib/yolor_pybind.cc  |   37 -
 .../vision/detection/contrib/yolov5.cc        |  295 --
 .../vision/detection/contrib/yolov5.h         |  108 -
 .../vision/detection/contrib/yolov5_pybind.cc |   38 -
 .../vision/detection/contrib/yolov5lite.cc    |  399 --
 .../vision/detection/contrib/yolov5lite.h     |  138 -
 .../detection/contrib/yolov5lite_pybind.cc    |   43 -
 .../vision/detection/contrib/yolov6.cc        |  267 --
 .../vision/detection/contrib/yolov6.h         |  108 -
 .../vision/detection/contrib/yolov6_pybind.cc |   37 -
 .../vision/detection/contrib/yolov7.cc        |  253 --
 .../vision/detection/contrib/yolov7.h         |  100 -
 .../vision/detection/contrib/yolov7_pybind.cc |   37 -
 .../vision/detection/contrib/yolox.cc         |  339 --
 .../vision/detection/contrib/yolox.h          |  107 -
 .../vision/detection/contrib/yolox_pybind.cc  |   37 -
 .../vision/detection/detection_pybind.cc      |   42 -
 .../fastdeploy/vision/detection/ppdet/model.h |   21 -
 .../vision/detection/ppdet/picodet.cc         |   66 -
 .../vision/detection/ppdet/picodet.h          |   36 -
 .../vision/detection/ppdet/ppdet_pybind.cc    |   95 -
 .../vision/detection/ppdet/ppyolo.cc          |   78 -
 .../vision/detection/ppdet/ppyolo.h           |   51 -
 .../vision/detection/ppdet/ppyoloe.cc         |  258 --
 .../vision/detection/ppdet/ppyoloe.h          |   68 -
 .../fastdeploy/vision/detection/ppdet/rcnn.cc |   84 -
 .../fastdeploy/vision/detection/ppdet/rcnn.h  |   39 -
 .../vision/detection/ppdet/yolov3.cc          |   64 -
 .../vision/detection/ppdet/yolov3.h           |   35 -
 .../vision/detection/ppdet/yolox.cc           |   72 -
 .../fastdeploy/vision/detection/ppdet/yolox.h |   35 -
 .../vision/facedet/contrib/retinaface.cc      |  310 --
 .../vision/facedet/contrib/retinaface.h       |   92 -
 .../facedet/contrib/retinaface_pybind.cc      |   38 -
 .../vision/facedet/contrib/scrfd.cc           |  369 --
 .../fastdeploy/vision/facedet/contrib/scrfd.h |  122 -
 .../vision/facedet/contrib/scrfd_pybind.cc    |   45 -
 .../vision/facedet/contrib/ultraface.cc       |  221 -
 .../vision/facedet/contrib/ultraface.h        |   84 -
 .../facedet/contrib/ultraface_pybind.cc       |   31 -
 .../vision/facedet/contrib/yolov5face.cc      |  294 --
 .../vision/facedet/contrib/yolov5face.h       |   97 -
 .../facedet/contrib/yolov5face_pybind.cc      |   41 -
 .../vision/facedet/facedet_pybind.cc          |   31 -
 .../vision/faceid/contrib/arcface.cc          |   83 -
 .../vision/faceid/contrib/arcface.h           |   65 -
 .../vision/faceid/contrib/arcface_pybind.cc   |   37 -
 .../vision/faceid/contrib/cosface.cc          |   83 -
 .../vision/faceid/contrib/cosface.h           |   66 -
 .../vision/faceid/contrib/cosface_pybind.cc   |   37 -
 .../vision/faceid/contrib/insightface_rec.cc  |  153 -
 .../vision/faceid/contrib/insightface_rec.h   |   72 -
 .../faceid/contrib/insightface_rec_pybind.cc  |   42 -
 .../vision/faceid/contrib/partial_fc.cc       |   84 -
 .../vision/faceid/contrib/partial_fc.h        |   64 -
 .../faceid/contrib/partial_fc_pybind.cc       |   37 -
 csrcs/fastdeploy/vision/faceid/contrib/vpl.cc |   82 -
 csrcs/fastdeploy/vision/faceid/contrib/vpl.h  |   65 -
 .../vision/faceid/contrib/vpl_pybind.cc       |   37 -
 .../fastdeploy/vision/faceid/faceid_pybind.cc |   33 -
 .../vision/matting/contrib/modnet.cc          |  175 -
 .../vision/matting/contrib/modnet.h           |   70 -
 .../vision/matting/contrib/modnet_pybind.cc   |   35 -
 .../vision/matting/matting_pybind.cc          |   26 -
 csrcs/fastdeploy/vision/ppcls/model.cc        |  153 -
 csrcs/fastdeploy/vision/ppcls/model.h         |   51 -
 csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc |   30 -
 csrcs/fastdeploy/vision/ppseg/model.cc        |  232 -
 csrcs/fastdeploy/vision/ppseg/model.h         |   43 -
 csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc |   35 -
 .../fastdeploy/vision/utils/FDTensor2CVMat.cc |   59 -
 .../vision/utils/cosine_similarity.cc         |   49 -
 csrcs/fastdeploy/vision/utils/l2_normalize.cc |   41 -
 csrcs/fastdeploy/vision/utils/nms.cc          |  127 -
 csrcs/fastdeploy/vision/utils/sort_det_res.cc |   81 -
 .../vision/utils/sort_face_det_res.cc         |   69 -
 csrcs/fastdeploy/vision/utils/utils.h         |  140 -
 csrcs/fastdeploy/vision/vision_pybind.cc      |   93 -
 .../fastdeploy/vision/visualize/detection.cc  |   64 -
 .../vision/visualize/face_detection.cc        |   84 -
 .../vision/visualize/matting_alpha.cc         |  119 -
 .../vision/visualize/segmentation.cc          |   46 -
 .../fastdeploy/vision/visualize/visualize.cc  |   47 -
 csrcs/fastdeploy/vision/visualize/visualize.h |   41 -
 .../vision/visualize/visualize_pybind.cc      |   62 -
 examples/.gitignore                           |    8 -
 examples/CMakeLists.txt                       |   50 -
 examples/resources/.gitignore                 |   15 -
 examples/resources/images/.gitignore          |    3 -
 examples/resources/models/.gitignore          |    5 -
 examples/resources/outputs/.gitignore         |    3 -
 examples/text/ernie_tokencls.cc               |  225 -
 examples/vision/biubug6_retinaface.cc         |   55 -
 examples/vision/deepcam_yolov5face.cc         |   53 -
 examples/vision/deepinsight_arcface.cc        |   64 -
 examples/vision/deepinsight_cosface.cc        |   64 -
 .../vision/deepinsight_insightface_rec.cc     |   64 -
 examples/vision/deepinsight_partial_fc.cc     |   64 -
 examples/vision/deepinsight_scrfd.cc          |   51 -
 examples/vision/deepinsight_vpl.cc            |   64 -
 .../vision/detection/README.md                |    0
 .../vision/detection/yolov7/README.md         |    0
 .../detection/yolov7/cpp/CMakeLists.txt       |    0
 .../vision/detection/yolov7/cpp/README.md     |    0
 .../vision/detection/yolov7/cpp/infer.cc      |    0
 .../vision/detection/yolov7/python/README.md  |    0
 .../vision/detection/yolov7/python/infer.py   |    0
 examples/vision/linzaer_ultraface.cc          |   53 -
 examples/vision/megvii_yolox.cc               |   52 -
 examples/vision/meituan_yolov6.cc             |   52 -
 examples/vision/ppdet_ppyoloe.cc              |   51 -
 examples/vision/ppogg_yolov5lite.cc           |   52 -
 examples/vision/ppseg_unet.cc                 |   59 -
 examples/vision/rangilyu_nanodet_plus.cc      |   53 -
 examples/vision/ultralytics_yolov5.cc         |   52 -
 examples/vision/wongkinyiu_scaledyolov4.cc    |   52 -
 examples/vision/wongkinyiu_yolor.cc           |   52 -
 examples/vision/wongkinyiu_yolov7.cc          |   52 -
 examples/vision/zhkkke_modnet.cc              |   58 -
 model_zoo/.gitignore                          |   12 -
 model_zoo/text/ernie-3.0/README.md            |  238 --
 model_zoo/text/ernie-3.0/ernie_predictor.py   |  242 --
 model_zoo/text/ernie-3.0/infer_cpu.py         |   78 -
 model_zoo/text/ernie-3.0/infer_gpu.py         |   84 -
 model_zoo/text/ernie-3.0/requirements_cpu.txt |    3 -
 model_zoo/text/ernie-3.0/requirements_gpu.txt |    4 -
 model_zoo/vision/arcface/README.md            |   80 -
 model_zoo/vision/arcface/api.md               |  113 -
 model_zoo/vision/arcface/arcface.py           |   46 -
 model_zoo/vision/arcface/cpp/CMakeLists.txt   |   17 -
 model_zoo/vision/arcface/cpp/README.md        |   61 -
 model_zoo/vision/arcface/cpp/arcface.cc       |   64 -
 model_zoo/vision/modnet/README.md             |   67 -
 model_zoo/vision/modnet/api.md                |   72 -
 model_zoo/vision/modnet/cpp/CMakeLists.txt    |   17 -
 model_zoo/vision/modnet/cpp/README.md         |   49 -
 model_zoo/vision/modnet/cpp/modnet.cc         |   57 -
 model_zoo/vision/modnet/modnet.py             |   22 -
 model_zoo/vision/nanodet_plus/README.md       |   46 -
 model_zoo/vision/nanodet_plus/api.md          |   71 -
 .../vision/nanodet_plus/cpp/CMakeLists.txt    |   17 -
 model_zoo/vision/nanodet_plus/cpp/README.md   |   30 -
 .../vision/nanodet_plus/cpp/nanodet_plus.cc   |   40 -
 model_zoo/vision/nanodet_plus/nanodet_plus.py |   23 -
 model_zoo/vision/ppseg/ppseg_unet.py          |   26 -
 model_zoo/vision/retinaface/README.md         |   76 -
 model_zoo/vision/retinaface/api.md            |   71 -
 .../vision/retinaface/cpp/CMakeLists.txt      |   17 -
 model_zoo/vision/retinaface/cpp/README.md     |   61 -
 model_zoo/vision/retinaface/cpp/retinaface.cc |   49 -
 model_zoo/vision/retinaface/retinaface.py     |   24 -
 model_zoo/vision/scaledyolov4/README.md       |   66 -
 model_zoo/vision/scaledyolov4/api.md          |   71 -
 .../vision/scaledyolov4/cpp/CMakeLists.txt    |   17 -
 model_zoo/vision/scaledyolov4/cpp/README.md   |   53 -
 .../vision/scaledyolov4/cpp/scaledyolov4.cc   |   40 -
 .../vision/scaledyolov4/scaled_yolov4.py      |   21 -
 model_zoo/vision/scrfd/README.md              |   92 -
 model_zoo/vision/scrfd/api.md                 |   71 -
 model_zoo/vision/scrfd/cpp/CMakeLists.txt     |   17 -
 model_zoo/vision/scrfd/cpp/README.md          |   76 -
 model_zoo/vision/scrfd/cpp/scrfd.cc           |   44 -
 model_zoo/vision/scrfd/scrfd.py               |   25 -
 model_zoo/vision/ultraface/README.md          |   49 -
 model_zoo/vision/ultraface/api.md             |   71 -
 model_zoo/vision/ultraface/cpp/CMakeLists.txt |   17 -
 model_zoo/vision/ultraface/cpp/README.md      |   36 -
 model_zoo/vision/ultraface/cpp/ultraface.cc   |   48 -
 model_zoo/vision/ultraface/ultraface.py       |   23 -
 model_zoo/vision/yolor/README.md              |   66 -
 model_zoo/vision/yolor/api.md                 |   71 -
 model_zoo/vision/yolor/cpp/CMakeLists.txt     |   17 -
 model_zoo/vision/yolor/cpp/README.md          |   53 -
 model_zoo/vision/yolor/cpp/yolor.cc           |   40 -
 model_zoo/vision/yolor/yolor.py               |   21 -
 model_zoo/vision/yolov5/README.md             |   47 -
 model_zoo/vision/yolov5/api.md                |   71 -
 model_zoo/vision/yolov5/cpp/CMakeLists.txt    |   18 -
 model_zoo/vision/yolov5/cpp/README.md         |   31 -
 model_zoo/vision/yolov5/cpp/yolov5.cc         |   40 -
 model_zoo/vision/yolov5/yolov5.py             |   23 -
 model_zoo/vision/yolov5face/README.md         |   78 -
 model_zoo/vision/yolov5face/api.md            |   71 -
 .../vision/yolov5face/cpp/CMakeLists.txt      |   17 -
 model_zoo/vision/yolov5face/cpp/README.md     |   60 -
 model_zoo/vision/yolov5face/cpp/yolov5face.cc |   40 -
 model_zoo/vision/yolov5face/yolov5face.py     |   17 -
 model_zoo/vision/yolov5lite/README.md         |  130 -
 model_zoo/vision/yolov5lite/api.md            |   71 -
 .../vision/yolov5lite/cpp/CMakeLists.txt      |   17 -
 model_zoo/vision/yolov5lite/cpp/README.md     |  117 -
 model_zoo/vision/yolov5lite/cpp/yolov5lite.cc |   42 -
 model_zoo/vision/yolov5lite/yolov5lite.py     |   24 -
 model_zoo/vision/yolov6/README.md             |   47 -
 model_zoo/vision/yolov6/api.md                |   71 -
 model_zoo/vision/yolov6/cpp/CMakeLists.txt    |   17 -
 model_zoo/vision/yolov6/cpp/README.md         |   31 -
 model_zoo/vision/yolov6/cpp/yolov6.cc         |   40 -
 model_zoo/vision/yolov6/yolov6.py             |   23 -
 model_zoo/vision/yolov7/README.md             |   70 -
 model_zoo/vision/yolov7/api.md                |   71 -
 model_zoo/vision/yolov7/cpp/CMakeLists.txt    |   17 -
 model_zoo/vision/yolov7/cpp/README.md         |   53 -
 model_zoo/vision/yolov7/cpp/yolov7.cc         |   40 -
 model_zoo/vision/yolov7/yolov7.py             |   21 -
 model_zoo/vision/yolox/README.md              |   47 -
 model_zoo/vision/yolox/api.md                 |   71 -
 model_zoo/vision/yolox/cpp/CMakeLists.txt     |   17 -
 model_zoo/vision/yolox/cpp/README.md          |   31 -
 model_zoo/vision/yolox/cpp/yolox.cc           |   40 -
 model_zoo/vision/yolox/yolox.py               |   22 -
 sdk_mannager/fastdeploy/__init__.py           |  230 -
 sdk_mannager/fastdeploy/__main__.py           |   18 -
 sdk_mannager/fastdeploy/download.py           |  186 -
 sdk_mannager/requirements.txt                 |    2 -
 sdk_mannager/setup.py                         |   36 -
 333 files changed, 1 insertion(+), 37500 deletions(-)
 delete mode 100644 csrcs/fastdeploy/CMakeLists.txt
 delete mode 100644 csrcs/fastdeploy/backends/backend.h
 delete mode 100644 csrcs/fastdeploy/backends/common/multiclass_nms.cc
 delete mode 100644 csrcs/fastdeploy/backends/common/multiclass_nms.h
 delete mode 100644 csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc
 delete mode 100644 csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h
 delete mode 100644 csrcs/fastdeploy/backends/ort/ort_backend.cc
 delete mode 100644 csrcs/fastdeploy/backends/ort/ort_backend.h
 delete mode 100644 csrcs/fastdeploy/backends/ort/utils.cc
 delete mode 100644 csrcs/fastdeploy/backends/ort/utils.h
 delete mode 100644 csrcs/fastdeploy/backends/paddle/paddle_backend.cc
 delete mode 100644 csrcs/fastdeploy/backends/paddle/paddle_backend.h
 delete mode 100644 csrcs/fastdeploy/backends/paddle/util.cc
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/README.md
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/argsParser.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/buffers.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/common.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/getOptions.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/half.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logger.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logger.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logging.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/trt_backend.cc
 delete mode 100644 csrcs/fastdeploy/backends/tensorrt/trt_backend.h
 delete mode 100644 csrcs/fastdeploy/core/config.h.in
 delete mode 100644 csrcs/fastdeploy/core/fd_tensor.cc
 delete mode 100644 csrcs/fastdeploy/core/fd_tensor.h
 delete mode 100644 csrcs/fastdeploy/core/fd_type.cc
 delete mode 100644 csrcs/fastdeploy/core/fd_type.h
 delete mode 100644 csrcs/fastdeploy/fastdeploy_model.cc
 delete mode 100644 csrcs/fastdeploy/fastdeploy_model.h
 delete mode 100644 csrcs/fastdeploy/fastdeploy_runtime.cc
 delete mode 100644 csrcs/fastdeploy/fastdeploy_runtime.h
 delete mode 100644 csrcs/fastdeploy/function/eigen.cc
 delete mode 100644 csrcs/fastdeploy/function/eigen.h
 delete mode 100644 csrcs/fastdeploy/function/reduce.cc
 delete mode 100644 csrcs/fastdeploy/function/reduce.h
 delete mode 100644 csrcs/fastdeploy/function/reduce_functor.h
 delete mode 100644 csrcs/fastdeploy/pybind/fastdeploy_model.cc
 delete mode 100644 csrcs/fastdeploy/pybind/fastdeploy_runtime.cc
 delete mode 100644 csrcs/fastdeploy/pybind/main.cc.in
 delete mode 100644 csrcs/fastdeploy/pybind/main.h
 delete mode 100644 csrcs/fastdeploy/text.h
 delete mode 100644 csrcs/fastdeploy/text/common/option.h
 delete mode 100644 csrcs/fastdeploy/text/common/result.cc
 delete mode 100644 csrcs/fastdeploy/text/common/result.h
 delete mode 100644 csrcs/fastdeploy/text/postprocessor/postprocessor.cc
 delete mode 100644 csrcs/fastdeploy/text/postprocessor/postprocessor.h
 delete mode 100644 csrcs/fastdeploy/text/preprocessor/preprocessor.cc
 delete mode 100644 csrcs/fastdeploy/text/preprocessor/preprocessor.h
 delete mode 100644 csrcs/fastdeploy/text/text_model.cc
 delete mode 100644 csrcs/fastdeploy/text/text_model.h
 delete mode 100644 csrcs/fastdeploy/text/text_pybind.cc
 delete mode 100644 csrcs/fastdeploy/utils/perf.h
 delete mode 100644 csrcs/fastdeploy/utils/unique_ptr.h
 delete mode 100644 csrcs/fastdeploy/utils/utils.cc
 delete mode 100644 csrcs/fastdeploy/utils/utils.h
 delete mode 100644 csrcs/fastdeploy/vision.h
 delete mode 100644 csrcs/fastdeploy/vision/AddModel.md
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/base.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/base.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/cast.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/cast.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/center_crop.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/center_crop.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/color_space_convert.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/color_space_convert.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/convert.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/convert.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/hwc2chw.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/hwc2chw.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/mat.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/mat.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/normalize.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/normalize.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad_to_size.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad_to_size.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize_by_short.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize_by_short.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/stride_pad.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/stride_pad.h
 delete mode 100644 csrcs/fastdeploy/vision/common/processors/transform.h
 delete mode 100644 csrcs/fastdeploy/vision/common/result.cc
 delete mode 100644 csrcs/fastdeploy/vision/common/result.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/detection_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/model.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/picodet.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/picodet.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/rcnn.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolov3.h
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolox.cc
 delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolox.h
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface.h
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd.h
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface.h
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h
 delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/facedet/facedet_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface.h
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface.h
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl.h
 delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/faceid/faceid_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet.cc
 delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet.h
 delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/matting/matting_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/ppcls/model.cc
 delete mode 100644 csrcs/fastdeploy/vision/ppcls/model.h
 delete mode 100644 csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/ppseg/model.cc
 delete mode 100644 csrcs/fastdeploy/vision/ppseg/model.h
 delete mode 100644 csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/cosine_similarity.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/l2_normalize.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/nms.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/sort_det_res.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/sort_face_det_res.cc
 delete mode 100644 csrcs/fastdeploy/vision/utils/utils.h
 delete mode 100644 csrcs/fastdeploy/vision/vision_pybind.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/detection.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/face_detection.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/matting_alpha.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/segmentation.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize.cc
 delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize.h
 delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize_pybind.cc
 delete mode 100644 examples/.gitignore
 delete mode 100644 examples/CMakeLists.txt
 delete mode 100644 examples/resources/.gitignore
 delete mode 100644 examples/resources/images/.gitignore
 delete mode 100644 examples/resources/models/.gitignore
 delete mode 100644 examples/resources/outputs/.gitignore
 delete mode 100644 examples/text/ernie_tokencls.cc
 delete mode 100644 examples/vision/biubug6_retinaface.cc
 delete mode 100644 examples/vision/deepcam_yolov5face.cc
 delete mode 100644 examples/vision/deepinsight_arcface.cc
 delete mode 100644 examples/vision/deepinsight_cosface.cc
 delete mode 100644 examples/vision/deepinsight_insightface_rec.cc
 delete mode 100644 examples/vision/deepinsight_partial_fc.cc
 delete mode 100644 examples/vision/deepinsight_scrfd.cc
 delete mode 100644 examples/vision/deepinsight_vpl.cc
 rename {new_examples => examples}/vision/detection/README.md (100%)
 rename {new_examples => examples}/vision/detection/yolov7/README.md (100%)
 rename {new_examples => examples}/vision/detection/yolov7/cpp/CMakeLists.txt (100%)
 rename {new_examples => examples}/vision/detection/yolov7/cpp/README.md (100%)
 rename {new_examples => examples}/vision/detection/yolov7/cpp/infer.cc (100%)
 rename {new_examples => examples}/vision/detection/yolov7/python/README.md (100%)
 rename {new_examples => examples}/vision/detection/yolov7/python/infer.py (100%)
 delete mode 100644 examples/vision/linzaer_ultraface.cc
 delete mode 100644 examples/vision/megvii_yolox.cc
 delete mode 100644 examples/vision/meituan_yolov6.cc
 delete mode 100644 examples/vision/ppdet_ppyoloe.cc
 delete mode 100644 examples/vision/ppogg_yolov5lite.cc
 delete mode 100644 examples/vision/ppseg_unet.cc
 delete mode 100644 examples/vision/rangilyu_nanodet_plus.cc
 delete mode 100644 examples/vision/ultralytics_yolov5.cc
 delete mode 100644 examples/vision/wongkinyiu_scaledyolov4.cc
 delete mode 100644 examples/vision/wongkinyiu_yolor.cc
 delete mode 100644 examples/vision/wongkinyiu_yolov7.cc
 delete mode 100644 examples/vision/zhkkke_modnet.cc
 delete mode 100644 model_zoo/.gitignore
 delete mode 100755 model_zoo/text/ernie-3.0/README.md
 delete mode 100755 model_zoo/text/ernie-3.0/ernie_predictor.py
 delete mode 100755 model_zoo/text/ernie-3.0/infer_cpu.py
 delete mode 100755 model_zoo/text/ernie-3.0/infer_gpu.py
 delete mode 100755 model_zoo/text/ernie-3.0/requirements_cpu.txt
 delete mode 100755 model_zoo/text/ernie-3.0/requirements_gpu.txt
 delete mode 100644 model_zoo/vision/arcface/README.md
 delete mode 100644 model_zoo/vision/arcface/api.md
 delete mode 100644 model_zoo/vision/arcface/arcface.py
 delete mode 100644 model_zoo/vision/arcface/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/arcface/cpp/README.md
 delete mode 100644 model_zoo/vision/arcface/cpp/arcface.cc
 delete mode 100644 model_zoo/vision/modnet/README.md
 delete mode 100644 model_zoo/vision/modnet/api.md
 delete mode 100644 model_zoo/vision/modnet/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/modnet/cpp/README.md
 delete mode 100644 model_zoo/vision/modnet/cpp/modnet.cc
 delete mode 100644 model_zoo/vision/modnet/modnet.py
 delete mode 100644 model_zoo/vision/nanodet_plus/README.md
 delete mode 100644 model_zoo/vision/nanodet_plus/api.md
 delete mode 100644 model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/nanodet_plus/cpp/README.md
 delete mode 100644 model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc
 delete mode 100644 model_zoo/vision/nanodet_plus/nanodet_plus.py
 delete mode 100644 model_zoo/vision/ppseg/ppseg_unet.py
 delete mode 100644 model_zoo/vision/retinaface/README.md
 delete mode 100644 model_zoo/vision/retinaface/api.md
 delete mode 100644 model_zoo/vision/retinaface/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/retinaface/cpp/README.md
 delete mode 100644 model_zoo/vision/retinaface/cpp/retinaface.cc
 delete mode 100644 model_zoo/vision/retinaface/retinaface.py
 delete mode 100644 model_zoo/vision/scaledyolov4/README.md
 delete mode 100644 model_zoo/vision/scaledyolov4/api.md
 delete mode 100644 model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/scaledyolov4/cpp/README.md
 delete mode 100644 model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc
 delete mode 100644 model_zoo/vision/scaledyolov4/scaled_yolov4.py
 delete mode 100644 model_zoo/vision/scrfd/README.md
 delete mode 100644 model_zoo/vision/scrfd/api.md
 delete mode 100644 model_zoo/vision/scrfd/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/scrfd/cpp/README.md
 delete mode 100644 model_zoo/vision/scrfd/cpp/scrfd.cc
 delete mode 100644 model_zoo/vision/scrfd/scrfd.py
 delete mode 100644 model_zoo/vision/ultraface/README.md
 delete mode 100644 model_zoo/vision/ultraface/api.md
 delete mode 100644 model_zoo/vision/ultraface/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/ultraface/cpp/README.md
 delete mode 100644 model_zoo/vision/ultraface/cpp/ultraface.cc
 delete mode 100644 model_zoo/vision/ultraface/ultraface.py
 delete mode 100644 model_zoo/vision/yolor/README.md
 delete mode 100644 model_zoo/vision/yolor/api.md
 delete mode 100644 model_zoo/vision/yolor/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolor/cpp/README.md
 delete mode 100644 model_zoo/vision/yolor/cpp/yolor.cc
 delete mode 100644 model_zoo/vision/yolor/yolor.py
 delete mode 100644 model_zoo/vision/yolov5/README.md
 delete mode 100644 model_zoo/vision/yolov5/api.md
 delete mode 100644 model_zoo/vision/yolov5/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolov5/cpp/README.md
 delete mode 100644 model_zoo/vision/yolov5/cpp/yolov5.cc
 delete mode 100644 model_zoo/vision/yolov5/yolov5.py
 delete mode 100644 model_zoo/vision/yolov5face/README.md
 delete mode 100644 model_zoo/vision/yolov5face/api.md
 delete mode 100644 model_zoo/vision/yolov5face/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolov5face/cpp/README.md
 delete mode 100644 model_zoo/vision/yolov5face/cpp/yolov5face.cc
 delete mode 100644 model_zoo/vision/yolov5face/yolov5face.py
 delete mode 100644 model_zoo/vision/yolov5lite/README.md
 delete mode 100644 model_zoo/vision/yolov5lite/api.md
 delete mode 100644 model_zoo/vision/yolov5lite/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolov5lite/cpp/README.md
 delete mode 100644 model_zoo/vision/yolov5lite/cpp/yolov5lite.cc
 delete mode 100644 model_zoo/vision/yolov5lite/yolov5lite.py
 delete mode 100644 model_zoo/vision/yolov6/README.md
 delete mode 100644 model_zoo/vision/yolov6/api.md
 delete mode 100644 model_zoo/vision/yolov6/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolov6/cpp/README.md
 delete mode 100644 model_zoo/vision/yolov6/cpp/yolov6.cc
 delete mode 100644 model_zoo/vision/yolov6/yolov6.py
 delete mode 100644 model_zoo/vision/yolov7/README.md
 delete mode 100644 model_zoo/vision/yolov7/api.md
 delete mode 100644 model_zoo/vision/yolov7/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolov7/cpp/README.md
 delete mode 100644 model_zoo/vision/yolov7/cpp/yolov7.cc
 delete mode 100644 model_zoo/vision/yolov7/yolov7.py
 delete mode 100644 model_zoo/vision/yolox/README.md
 delete mode 100644 model_zoo/vision/yolox/api.md
 delete mode 100644 model_zoo/vision/yolox/cpp/CMakeLists.txt
 delete mode 100644 model_zoo/vision/yolox/cpp/README.md
 delete mode 100644 model_zoo/vision/yolox/cpp/yolox.cc
 delete mode 100644 model_zoo/vision/yolox/yolox.py
 delete mode 100644 sdk_mannager/fastdeploy/__init__.py
 delete mode 100644 sdk_mannager/fastdeploy/__main__.py
 delete mode 100644 sdk_mannager/fastdeploy/download.py
 delete mode 100644 sdk_mannager/requirements.txt
 delete mode 100644 sdk_mannager/setup.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c62522c8..7e2621f6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ option(CSRCS_DIR_NAME "Name of source code directory")
 option(LIBRARY_NAME "Name of build library name")
 option(PY_LIBRARY_NAME "Name of build python library name")
 if(NOT CSRCS_DIR_NAME)
-  set(CSRCS_DIR_NAME "csrcs")
+  set(CSRCS_DIR_NAME "csrc")
 endif()
 if(NOT LIBRARY_NAME)
   set(LIBRARY_NAME "fastdeploy")
@@ -55,10 +55,6 @@ option(ENABLE_FDTENSOR_FUNC "Whether to compile with function of FDTensor." OFF)
 option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
 option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF)
 
-# Whether to build fastdeply with vision/text/... examples, only for testings.
-option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF)
-option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF)
-
 # config GIT_URL with github mirrors to speed up dependent repos clone
 option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
 if(NOT GIT_URL)
@@ -102,19 +98,6 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
 include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  # ENABLE_VISION and ENABLE_VISION_VISUALIZE must be ON if enable vision examples.
-  message(STATUS "Found WTIH_VISION_EXAMPLES ON, so, force ENABLE_VISION and ENABLE_VISION_VISUALIZE ON")
-  set(ENABLE_VISION ON CACHE BOOL "force to enable vision models usage" FORCE)
-  set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE)
-endif()
-
-if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  # ENABLE_TEXT must be ON if enable text examples.
-  message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON")
-  set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE)
-endif()
-
 add_definitions(-DFASTDEPLOY_LIB)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
@@ -277,22 +260,6 @@ if(MSVC)
 endif()
 target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
 
-# add examples after prepare include paths for third-parties
-if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  add_definitions(-DWITH_VISION_EXAMPLES)
-  set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
-  add_subdirectory(examples)
-endif()
-
-if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
-  add_definitions(-DWITH_TEXT_EXAMPLES)
-  set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
-  # Avoid to add_subdirectory repeatedly
-  if (NOT WITH_VISION_EXAMPLES)
-    add_subdirectory(examples)
-  endif()
-endif()
-
 if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests)
   add_definitions(-DWITH_TESTING)
   include(external/gtest.cmake)
diff --git a/csrcs/fastdeploy/CMakeLists.txt b/csrcs/fastdeploy/CMakeLists.txt
deleted file mode 100644
index e69de29bb..000000000
diff --git a/csrcs/fastdeploy/backends/backend.h b/csrcs/fastdeploy/backends/backend.h
deleted file mode 100644
index de7b5a575..000000000
--- a/csrcs/fastdeploy/backends/backend.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-#include "fastdeploy/backends/common/multiclass_nms.h"
-#include "fastdeploy/core/fd_tensor.h"
-
-namespace fastdeploy {
-
-struct TensorInfo {
-  std::string name;
-  std::vector<int> shape;
-  FDDataType dtype;
-};
-
-class BaseBackend {
- public:
-  bool initialized_ = false;
-
-  BaseBackend() {}
-  virtual ~BaseBackend() = default;
-
-  virtual bool Initialized() const { return initialized_; }
-
-  virtual int NumInputs() const = 0;
-  virtual int NumOutputs() const = 0;
-  virtual TensorInfo GetInputInfo(int index) = 0;
-  virtual TensorInfo GetOutputInfo(int index) = 0;
-  virtual bool Infer(std::vector<FDTensor>& inputs,
-                     std::vector<FDTensor>* outputs) = 0;
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/common/multiclass_nms.cc b/csrcs/fastdeploy/backends/common/multiclass_nms.cc
deleted file mode 100644
index c3d65ec7d..000000000
--- a/csrcs/fastdeploy/backends/common/multiclass_nms.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/common/multiclass_nms.h"
-#include <algorithm>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace backend {
-template <class T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-void GetMaxScoreIndex(const float* scores, const int& score_size,
-                      const float& threshold, const int& top_k,
-                      std::vector<std::pair<float, int>>* sorted_indices) {
-  for (size_t i = 0; i < score_size; ++i) {
-    if (scores[i] > threshold) {
-      sorted_indices->push_back(std::make_pair(scores[i], i));
-    }
-  }
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
-                   SortScorePairDescend<int>);
-  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
-    sorted_indices->resize(top_k);
-  }
-}
-
-float BBoxArea(const float* box, const bool& normalized) {
-  if (box[2] < box[0] || box[3] < box[1]) {
-    // If coordinate values are is invalid
-    // (e.g. xmax < xmin or ymax < ymin), return 0.
-    return 0.f;
-  } else {
-    const float w = box[2] - box[0];
-    const float h = box[3] - box[1];
-    if (normalized) {
-      return w * h;
-    } else {
-      // If coordinate values are not within range [0, 1].
-      return (w + 1) * (h + 1);
-    }
-  }
-}
-
-float JaccardOverlap(const float* box1, const float* box2,
-                     const bool& normalized) {
-  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
-      box2[3] < box1[1]) {
-    return 0.f;
-  } else {
-    const float inter_xmin = std::max(box1[0], box2[0]);
-    const float inter_ymin = std::max(box1[1], box2[1]);
-    const float inter_xmax = std::min(box1[2], box2[2]);
-    const float inter_ymax = std::min(box1[3], box2[3]);
-    float norm = normalized ? 0.0f : 1.0f;
-    float inter_w = inter_xmax - inter_xmin + norm;
-    float inter_h = inter_ymax - inter_ymin + norm;
-    const float inter_area = inter_w * inter_h;
-    const float bbox1_area = BBoxArea(box1, normalized);
-    const float bbox2_area = BBoxArea(box2, normalized);
-    return inter_area / (bbox1_area + bbox2_area - inter_area);
-  }
-}
-
-void MultiClassNMS::FastNMS(const float* boxes, const float* scores,
-                            const int& num_boxes,
-                            std::vector<int>* keep_indices) {
-  std::vector<std::pair<float, int>> sorted_indices;
-  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
-                   &sorted_indices);
-
-  float adaptive_threshold = nms_threshold;
-  while (sorted_indices.size() != 0) {
-    const int idx = sorted_indices.front().second;
-    bool keep = true;
-    for (size_t k = 0; k < keep_indices->size(); ++k) {
-      if (!keep) {
-        break;
-      }
-      const int kept_idx = (*keep_indices)[k];
-      float overlap =
-          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
-      keep = overlap <= adaptive_threshold;
-    }
-    if (keep) {
-      keep_indices->push_back(idx);
-    }
-    sorted_indices.erase(sorted_indices.begin());
-    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
-      adaptive_threshold *= nms_eta;
-    }
-  }
-}
-
-int MultiClassNMS::NMSForEachSample(
-    const float* boxes, const float* scores, int num_boxes, int num_classes,
-    std::map<int, std::vector<int>>* keep_indices) {
-  for (int i = 0; i < num_classes; ++i) {
-    if (i == background_label) {
-      continue;
-    }
-    const float* score_for_class_i = scores + i * num_boxes;
-    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
-  }
-  int num_det = 0;
-  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
-    num_det += iter->second.size();
-  }
-
-  if (keep_top_k > -1 && num_det > keep_top_k) {
-    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-    for (const auto& it : *keep_indices) {
-      int label = it.first;
-      const float* current_score = scores + label * num_boxes;
-      auto& label_indices = it.second;
-      for (size_t j = 0; j < label_indices.size(); ++j) {
-        int idx = label_indices[j];
-        score_index_pairs.push_back(
-            std::make_pair(current_score[idx], std::make_pair(label, idx)));
-      }
-    }
-    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
-                     SortScorePairDescend<std::pair<int, int>>);
-    score_index_pairs.resize(keep_top_k);
-
-    std::map<int, std::vector<int>> new_indices;
-    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
-      int label = score_index_pairs[j].second.first;
-      int idx = score_index_pairs[j].second.second;
-      new_indices[label].push_back(idx);
-    }
-    new_indices.swap(*keep_indices);
-    num_det = keep_top_k;
-  }
-  return num_det;
-}
-
-void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
-                            const std::vector<int64_t>& boxes_dim,
-                            const std::vector<int64_t>& scores_dim) {
-  int score_size = scores_dim.size();
-
-  int64_t batch_size = scores_dim[0];
-  int64_t box_dim = boxes_dim[2];
-  int64_t out_dim = box_dim + 2;
-
-  int num_nmsed_out = 0;
-  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
-                                std::to_string(score_size) + ".");
-  FDASSERT(boxes_dim[2] == 4,
-           "Require the 3-dimension of input boxes be 4, but now it's " +
-               std::to_string(boxes_dim[2]) + ".");
-  out_num_rois_data.resize(batch_size);
-
-  std::vector<std::map<int, std::vector<int>>> all_indices;
-  for (size_t i = 0; i < batch_size; ++i) {
-    std::map<int, std::vector<int>> indices;  // indices kept for each class
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
-                               boxes_dim[1], scores_dim[1], &indices);
-    num_nmsed_out += num;
-    out_num_rois_data[i] = num;
-    all_indices.emplace_back(indices);
-  }
-  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
-  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
-  if (num_nmsed_out == 0) {
-    for (size_t i = 0; i < batch_size; ++i) {
-      out_num_rois_data[i] = 0;
-    }
-    return;
-  }
-  out_box_data.resize(num_nmsed_out * 6);
-  out_index_data.resize(num_nmsed_out);
-
-  int count = 0;
-  for (size_t i = 0; i < batch_size; ++i) {
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    for (const auto& it : all_indices[i]) {
-      int label = it.first;
-      const auto& indices = it.second;
-      const float* current_scores_class_ptr =
-          current_scores_ptr + label * scores_dim[2];
-      for (size_t j = 0; j < indices.size(); ++j) {
-        int start = count * 6;
-        out_box_data[start] = label;
-        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
-
-        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
-        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
-        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
-
-        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
-        out_index_data[count] = i * boxes_dim[1] + indices[j];
-        count += 1;
-      }
-    }
-  }
-}
-}  // namespace backend
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/common/multiclass_nms.h b/csrcs/fastdeploy/backends/common/multiclass_nms.h
deleted file mode 100644
index 48a3d9336..000000000
--- a/csrcs/fastdeploy/backends/common/multiclass_nms.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <map>
-#include <string>
-#include <vector>
-
-namespace fastdeploy {
-namespace backend {
-struct MultiClassNMS {
-  int64_t background_label = -1;
-  int64_t keep_top_k = -1;
-  float nms_eta;
-  float nms_threshold = 0.7;
-  int64_t nms_top_k;
-  bool normalized;
-  float score_threshold;
-
-  std::vector<int32_t> out_num_rois_data;
-  std::vector<int32_t> out_index_data;
-  std::vector<float> out_box_data;
-  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
-               std::vector<int>* keep_indices);
-  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
-                       int num_classes,
-                       std::map<int, std::vector<int>>* keep_indices);
-  void Compute(const float* boxes, const float* scores,
-               const std::vector<int64_t>& boxes_dim,
-               const std::vector<int64_t>& scores_dim);
-};
-}  // namespace backend
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc
deleted file mode 100644
index a132dbffc..000000000
--- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc
+++ /dev/null
@@ -1,261 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef NON_64_PLATFORM
-
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
-#include <algorithm>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-struct OrtTensorDimensions : std::vector<int64_t> {
-  OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
-    OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
-    std::vector<int64_t>::operator=(ort.GetTensorShape(info));
-    ort.ReleaseTensorTypeAndShapeInfo(info);
-  }
-};
-
-template <class T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-void GetMaxScoreIndex(const float* scores, const int& score_size,
-                      const float& threshold, const int& top_k,
-                      std::vector<std::pair<float, int>>* sorted_indices) {
-  for (size_t i = 0; i < score_size; ++i) {
-    if (scores[i] > threshold) {
-      sorted_indices->push_back(std::make_pair(scores[i], i));
-    }
-  }
-  // Sort the score pair according to the scores in descending order
-  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
-                   SortScorePairDescend<int>);
-  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
-    sorted_indices->resize(top_k);
-  }
-}
-
-float BBoxArea(const float* box, const bool& normalized) {
-  if (box[2] < box[0] || box[3] < box[1]) {
-    // If coordinate values are is invalid
-    // (e.g. xmax < xmin or ymax < ymin), return 0.
-    return 0.f;
-  } else {
-    const float w = box[2] - box[0];
-    const float h = box[3] - box[1];
-    if (normalized) {
-      return w * h;
-    } else {
-      // If coordinate values are not within range [0, 1].
-      return (w + 1) * (h + 1);
-    }
-  }
-}
-
-float JaccardOverlap(const float* box1, const float* box2,
-                     const bool& normalized) {
-  if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
-      box2[3] < box1[1]) {
-    return 0.f;
-  } else {
-    const float inter_xmin = std::max(box1[0], box2[0]);
-    const float inter_ymin = std::max(box1[1], box2[1]);
-    const float inter_xmax = std::min(box1[2], box2[2]);
-    const float inter_ymax = std::min(box1[3], box2[3]);
-    float norm = normalized ? 0.0f : 1.0f;
-    float inter_w = inter_xmax - inter_xmin + norm;
-    float inter_h = inter_ymax - inter_ymin + norm;
-    const float inter_area = inter_w * inter_h;
-    const float bbox1_area = BBoxArea(box1, normalized);
-    const float bbox2_area = BBoxArea(box2, normalized);
-    return inter_area / (bbox1_area + bbox2_area - inter_area);
-  }
-}
-
-void MultiClassNmsKernel::FastNMS(const float* boxes, const float* scores,
-                                  const int& num_boxes,
-                                  std::vector<int>* keep_indices) {
-  std::vector<std::pair<float, int>> sorted_indices;
-  GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
-                   &sorted_indices);
-
-  float adaptive_threshold = nms_threshold;
-  while (sorted_indices.size() != 0) {
-    const int idx = sorted_indices.front().second;
-    bool keep = true;
-    for (size_t k = 0; k < keep_indices->size(); ++k) {
-      if (!keep) {
-        break;
-      }
-      const int kept_idx = (*keep_indices)[k];
-      float overlap =
-          JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
-      keep = overlap <= adaptive_threshold;
-    }
-    if (keep) {
-      keep_indices->push_back(idx);
-    }
-    sorted_indices.erase(sorted_indices.begin());
-    if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
-      adaptive_threshold *= nms_eta;
-    }
-  }
-}
-
-int MultiClassNmsKernel::NMSForEachSample(
-    const float* boxes, const float* scores, int num_boxes, int num_classes,
-    std::map<int, std::vector<int>>* keep_indices) {
-  for (int i = 0; i < num_classes; ++i) {
-    if (i == background_label) {
-      continue;
-    }
-    const float* score_for_class_i = scores + i * num_boxes;
-    FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
-  }
-  int num_det = 0;
-  for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
-    num_det += iter->second.size();
-  }
-
-  if (keep_top_k > -1 && num_det > keep_top_k) {
-    std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-    for (const auto& it : *keep_indices) {
-      int label = it.first;
-      const float* current_score = scores + label * num_boxes;
-      auto& label_indices = it.second;
-      for (size_t j = 0; j < label_indices.size(); ++j) {
-        int idx = label_indices[j];
-        score_index_pairs.push_back(
-            std::make_pair(current_score[idx], std::make_pair(label, idx)));
-      }
-    }
-    std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
-                     SortScorePairDescend<std::pair<int, int>>);
-    score_index_pairs.resize(keep_top_k);
-
-    std::map<int, std::vector<int>> new_indices;
-    for (size_t j = 0; j < score_index_pairs.size(); ++j) {
-      int label = score_index_pairs[j].second.first;
-      int idx = score_index_pairs[j].second.second;
-      new_indices[label].push_back(idx);
-    }
-    new_indices.swap(*keep_indices);
-    num_det = keep_top_k;
-  }
-  return num_det;
-}
-
-void MultiClassNmsKernel::Compute(OrtKernelContext* context) {
-  const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
-  const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
-  const float* boxes_data =
-      reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
-  const float* scores_data =
-      reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
-  OrtTensorDimensions boxes_dim(ort_, boxes);
-  OrtTensorDimensions scores_dim(ort_, scores);
-  int score_size = scores_dim.size();
-
-  int64_t batch_size = scores_dim[0];
-  int64_t box_dim = boxes_dim[2];
-  int64_t out_dim = box_dim + 2;
-
-  int num_nmsed_out = 0;
-  FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
-                                std::to_string(score_size) + ".");
-  FDASSERT(boxes_dim[2] == 4,
-           "Require the 3-dimension of input boxes be 4, but now it's " +
-               std::to_string(boxes_dim[2]) + ".");
-  std::vector<int64_t> out_num_rois_dims = {batch_size};
-  OrtValue* out_num_rois = ort_.KernelContext_GetOutput(
-      context, 2, out_num_rois_dims.data(), out_num_rois_dims.size());
-  int32_t* out_num_rois_data = ort_.GetTensorMutableData<int32_t>(out_num_rois);
-
-  std::vector<std::map<int, std::vector<int>>> all_indices;
-  for (size_t i = 0; i < batch_size; ++i) {
-    std::map<int, std::vector<int>> indices;  // indices kept for each class
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
-                               boxes_dim[1], scores_dim[1], &indices);
-    num_nmsed_out += num;
-    out_num_rois_data[i] = num;
-    all_indices.emplace_back(indices);
-  }
-  std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
-  std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
-  OrtValue* out_box = ort_.KernelContext_GetOutput(
-      context, 0, out_box_dims.data(), out_box_dims.size());
-  OrtValue* out_index = ort_.KernelContext_GetOutput(
-      context, 1, out_index_dims.data(), out_index_dims.size());
-  if (num_nmsed_out == 0) {
-    int32_t* out_num_rois_data =
-        ort_.GetTensorMutableData<int32_t>(out_num_rois);
-    for (size_t i = 0; i < batch_size; ++i) {
-      out_num_rois_data[i] = 0;
-    }
-    return;
-  }
-  float* out_box_data = ort_.GetTensorMutableData<float>(out_box);
-  int32_t* out_index_data = ort_.GetTensorMutableData<int32_t>(out_index);
-
-  int count = 0;
-  for (size_t i = 0; i < batch_size; ++i) {
-    const float* current_boxes_ptr =
-        boxes_data + i * boxes_dim[1] * boxes_dim[2];
-    const float* current_scores_ptr =
-        scores_data + i * scores_dim[1] * scores_dim[2];
-    for (const auto& it : all_indices[i]) {
-      int label = it.first;
-      const auto& indices = it.second;
-      const float* current_scores_class_ptr =
-          current_scores_ptr + label * scores_dim[2];
-      for (size_t j = 0; j < indices.size(); ++j) {
-        int start = count * 6;
-        out_box_data[start] = label;
-        out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
-
-        out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
-        out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
-        out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
-
-        out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
-        out_index_data[count] = i * boxes_dim[1] + indices[j];
-        count += 1;
-      }
-    }
-  }
-}
-
-void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) {
-  background_label =
-      ort_.KernelInfoGetAttribute<int64_t>(info, "background_label");
-  keep_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "keep_top_k");
-  nms_eta = ort_.KernelInfoGetAttribute<float>(info, "nms_eta");
-  nms_threshold = ort_.KernelInfoGetAttribute<float>(info, "nms_threshold");
-  nms_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
-  normalized = ort_.KernelInfoGetAttribute<int64_t>(info, "normalized");
-  score_threshold = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
-}
-}  // namespace fastdeploy
-
-#endif
\ No newline at end of file
diff --git a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h
deleted file mode 100644
index 4e167d669..000000000
--- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <map>
-
-#ifndef NON_64_PLATFORM
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct MultiClassNmsKernel {
- protected:
-  int64_t background_label = -1;
-  int64_t keep_top_k = -1;
-  float nms_eta;
-  float nms_threshold = 0.7;
-  int64_t nms_top_k;
-  bool normalized;
-  float score_threshold;
-  Ort::CustomOpApi ort_;
-
- public:
-  MultiClassNmsKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
-      : ort_(ort) {
-    GetAttribute(info);
-  }
-
-  void GetAttribute(const OrtKernelInfo* info);
-
-  void Compute(OrtKernelContext* context);
-  void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
-               std::vector<int>* keep_indices);
-  int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
-                       int num_classes,
-                       std::map<int, std::vector<int>>* keep_indices);
-};
-
-struct MultiClassNmsOp
-    : Ort::CustomOpBase<MultiClassNmsOp, MultiClassNmsKernel> {
-  void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
-    return new MultiClassNmsKernel(api, info);
-  }
-
-  const char* GetName() const { return "MultiClassNMS"; }
-
-  size_t GetInputTypeCount() const { return 2; }
-
-  ONNXTensorElementDataType GetInputType(size_t index) const {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-  }
-
-  size_t GetOutputTypeCount() const { return 3; }
-
-  ONNXTensorElementDataType GetOutputType(size_t index) const {
-    if (index == 0) {
-      return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-    }
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
-  }
-
-  const char* GetExecutionProviderType() const {
-    return "CPUExecutionProvider";
-  }
-};
-
-}  // namespace fastdeploy
-
-#endif
\ No newline at end of file
diff --git a/csrcs/fastdeploy/backends/ort/ort_backend.cc b/csrcs/fastdeploy/backends/ort/ort_backend.cc
deleted file mode 100644
index c17890109..000000000
--- a/csrcs/fastdeploy/backends/ort/ort_backend.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/ort/ort_backend.h"
-#include <memory>
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
-#include "fastdeploy/backends/ort/utils.h"
-#include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
-#include "paddle2onnx/converter.h"
-#endif
-
-namespace fastdeploy {
-
-std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
-    std::vector<OrtCustomOp*>();
-
-void OrtBackend::BuildOption(const OrtBackendOption& option) {
-  option_ = option;
-  if (option.graph_optimization_level >= 0) {
-    session_options_.SetGraphOptimizationLevel(
-        GraphOptimizationLevel(option.graph_optimization_level));
-  }
-  if (option.intra_op_num_threads >= 0) {
-    session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
-  }
-  if (option.inter_op_num_threads >= 0) {
-    session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
-  }
-  if (option.execution_mode >= 0) {
-    session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
-  }
-  if (option.use_gpu) {
-    auto all_providers = Ort::GetAvailableProviders();
-    bool support_cuda = false;
-    std::string providers_msg = "";
-    for (size_t i = 0; i < all_providers.size(); ++i) {
-      providers_msg = providers_msg + all_providers[i] + ", ";
-      if (all_providers[i] == "CUDAExecutionProvider") {
-        support_cuda = true;
-      }
-    }
-    if (!support_cuda) {
-      FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
-                   "support GPU, the available providers are "
-                << providers_msg << "will fallback to CPUExecutionProvider."
-                << std::endl;
-      option_.use_gpu = false;
-    } else {
-      FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
-                                       std::to_string(option.gpu_id) + ".");
-      OrtCUDAProviderOptions cuda_options;
-      cuda_options.device_id = option.gpu_id;
-      session_options_.AppendExecutionProvider_CUDA(cuda_options);
-    }
-  }
-}
-
-bool OrtBackend::InitFromPaddle(const std::string& model_file,
-                                const std::string& params_file,
-                                const OrtBackendOption& option, bool verbose) {
-  if (initialized_) {
-    FDERROR << "OrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-#ifdef ENABLE_PADDLE_FRONTEND
-  char* model_content_ptr;
-  int model_content_size = 0;
-
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    strcpy(op.op_name, item.first.c_str());
-    strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
-  if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
-                           &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
-    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
-            << std::endl;
-    return false;
-  }
-
-  std::string onnx_model_proto(model_content_ptr,
-                               model_content_ptr + model_content_size);
-  delete[] model_content_ptr;
-  model_content_ptr = nullptr;
-  return InitFromOnnx(onnx_model_proto, option, true);
-#else
-  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
-             "call `InitFromOnnx` instead."
-          << std::endl;
-#endif
-  return false;
-}
-
-bool OrtBackend::InitFromOnnx(const std::string& model_file,
-                              const OrtBackendOption& option,
-                              bool from_memory_buffer) {
-  if (initialized_) {
-    FDERROR << "OrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-
-  BuildOption(option);
-  InitCustomOperators();
-  if (from_memory_buffer) {
-    session_ = {env_, model_file.data(), model_file.size(), session_options_};
-  } else {
-#ifdef _WIN32
-    session_ = {env_,
-                std::wstring(model_file.begin(), model_file.end()).c_str(),
-                session_options_};
-#else
-    session_ = {env_, model_file.c_str(), session_options_};
-#endif
-  }
-  binding_ = std::make_shared<Ort::IoBinding>(session_);
-
-  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
-  Ort::Allocator allocator(session_, memory_info);
-  size_t n_inputs = session_.GetInputCount();
-  for (size_t i = 0; i < n_inputs; ++i) {
-    auto input_name = session_.GetInputName(i, allocator);
-    auto type_info = session_.GetInputTypeInfo(i);
-    std::vector<int64_t> shape =
-        type_info.GetTensorTypeAndShapeInfo().GetShape();
-    ONNXTensorElementDataType data_type =
-        type_info.GetTensorTypeAndShapeInfo().GetElementType();
-    inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type});
-    allocator.Free(input_name);
-  }
-
-  size_t n_outputs = session_.GetOutputCount();
-  for (size_t i = 0; i < n_outputs; ++i) {
-    auto output_name = session_.GetOutputName(i, allocator);
-    auto type_info = session_.GetOutputTypeInfo(i);
-    std::vector<int64_t> shape =
-        type_info.GetTensorTypeAndShapeInfo().GetShape();
-    ONNXTensorElementDataType data_type =
-        type_info.GetTensorTypeAndShapeInfo().GetElementType();
-    outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type});
-
-    Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
-                                    OrtMemTypeDefault);
-    binding_->BindOutput(output_name, out_memory_info);
-
-    allocator.Free(output_name);
-  }
-  initialized_ = true;
-  return true;
-}
-
-void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
-  const auto info = value.GetTensorTypeAndShapeInfo();
-  const auto data_type = info.GetElementType();
-  size_t numel = info.GetElementCount();
-  tensor->shape = info.GetShape();
-
-  if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
-    tensor->data.resize(numel * sizeof(float));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(float));
-    tensor->dtype = FDDataType::FP32;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
-    tensor->data.resize(numel * sizeof(int32_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(int32_t));
-    tensor->dtype = FDDataType::INT32;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
-    tensor->data.resize(numel * sizeof(int64_t));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(int64_t));
-    tensor->dtype = FDDataType::INT64;
-  } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
-    tensor->data.resize(numel * sizeof(double));
-    memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
-           numel * sizeof(double));
-    tensor->dtype = FDDataType::FP64;
-  } else {
-    FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) +
-                        " while calling OrtBackend::CopyToCpu().");
-  }
-}
-
-bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
-                       std::vector<FDTensor>* outputs) {
-  if (inputs.size() != inputs_desc_.size()) {
-    FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
-            << ") should keep same with the inputs of this model("
-            << inputs_desc_.size() << ")." << std::endl;
-    return false;
-  }
-
-  // from FDTensor to Ort Inputs
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
-    binding_->BindInput(inputs[i].name.c_str(), ort_value);
-  }
-
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
-                                OrtMemTypeDefault);
-    binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
-  }
-
-  // Inference with inputs
-  try {
-    session_.Run({}, *(binding_.get()));
-  } catch (const std::exception& e) {
-    FDERROR << "Failed to Infer: " << e.what() << std::endl;
-    return false;
-  }
-
-  // Copy result after inference
-  std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
-  outputs->resize(ort_outputs.size());
-  for (size_t i = 0; i < ort_outputs.size(); ++i) {
-    (*outputs)[i].name = outputs_desc_[i].name;
-    CopyToCpu(ort_outputs[i], &((*outputs)[i]));
-  }
-
-  return true;
-}
-
-TensorInfo OrtBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  TensorInfo info;
-  info.name = inputs_desc_[index].name;
-  info.shape.assign(inputs_desc_[index].shape.begin(),
-                    inputs_desc_[index].shape.end());
-  info.dtype = GetFdDtype(inputs_desc_[index].dtype);
-  return info;
-}
-
-TensorInfo OrtBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  TensorInfo info;
-  info.name = outputs_desc_[index].name;
-  info.shape.assign(outputs_desc_[index].shape.begin(),
-                    outputs_desc_[index].shape.end());
-  info.dtype = GetFdDtype(outputs_desc_[index].dtype);
-  return info;
-}
-
-void OrtBackend::InitCustomOperators() {
-#ifndef NON_64_PLATFORM
-  if (custom_operators_.size() == 0) {
-    MultiClassNmsOp* custom_op = new MultiClassNmsOp{};
-    custom_operators_.push_back(custom_op);
-  }
-  for (size_t i = 0; i < custom_operators_.size(); ++i) {
-    custom_op_domain_.Add(custom_operators_[i]);
-  }
-  session_options_.Add(custom_op_domain_);
-#endif
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/ort/ort_backend.h b/csrcs/fastdeploy/backends/ort/ort_backend.h
deleted file mode 100644
index 5070934c6..000000000
--- a/csrcs/fastdeploy/backends/ort/ort_backend.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct OrtValueInfo {
-  std::string name;
-  std::vector<int64_t> shape;
-  ONNXTensorElementDataType dtype;
-};
-
-struct OrtBackendOption {
-  // -1 means default
-  // 0: ORT_DISABLE_ALL
-  // 1: ORT_ENABLE_BASIC
-  // 2: ORT_ENABLE_EXTENDED
-  // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
-  int graph_optimization_level = -1;
-  int intra_op_num_threads = -1;
-  int inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL
-  // 1: ORT_PARALLEL
-  int execution_mode = -1;
-  bool use_gpu = false;
-  int gpu_id = 0;
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-class OrtBackend : public BaseBackend {
- public:
-  OrtBackend() {}
-  virtual ~OrtBackend() = default;
-
-  void BuildOption(const OrtBackendOption& option);
-
-  bool InitFromPaddle(const std::string& model_file,
-                      const std::string& params_file,
-                      const OrtBackendOption& option = OrtBackendOption(),
-                      bool verbose = false);
-
-  bool InitFromOnnx(const std::string& model_file,
-                    const OrtBackendOption& option = OrtBackendOption(),
-                    bool from_memory_buffer = false);
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-
-  int NumOutputs() const { return outputs_desc_.size(); }
-
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-  static std::vector<OrtCustomOp*> custom_operators_;
-  void InitCustomOperators();
-
- private:
-  Ort::Env env_;
-  Ort::Session session_{nullptr};
-  Ort::SessionOptions session_options_;
-  std::shared_ptr<Ort::IoBinding> binding_;
-  std::vector<OrtValueInfo> inputs_desc_;
-  std::vector<OrtValueInfo> outputs_desc_;
-#ifndef NON_64_PLATFORM
-  Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
-#endif
-  OrtBackendOption option_;
-  void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
-};
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/ort/utils.cc b/csrcs/fastdeploy/backends/ort/utils.cc
deleted file mode 100644
index ae3e45b86..000000000
--- a/csrcs/fastdeploy/backends/ort/utils.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/ort/utils.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) {
-  if (fd_dtype == FDDataType::FP32) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-  } else if (fd_dtype == FDDataType::FP64) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
-  } else if (fd_dtype == FDDataType::INT32) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
-  } else if (fd_dtype == FDDataType::INT64) {
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
-  }
-  FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
-          << std::endl;
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
-}
-
-FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) {
-  if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
-    return FDDataType::FP32;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
-    return FDDataType::FP64;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
-    return FDDataType::INT32;
-  } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
-    return FDDataType::INT64;
-  }
-  FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
-  return FDDataType::FP32;
-}
-
-Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) {
-  FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
-           "Only support tensor which device is CPU or GPU for OrtBackend.");
-  if (tensor.device == Device::GPU && is_backend_cuda) {
-    Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
-                                OrtMemTypeDefault);
-    auto ort_value = Ort::Value::CreateTensor(
-        memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
-        tensor.shape.size(), GetOrtDtype(tensor.dtype));
-    return ort_value;
-  }
-  Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
-  auto ort_value = Ort::Value::CreateTensor(
-      memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
-      tensor.shape.size(), GetOrtDtype(tensor.dtype));
-  return ort_value;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/ort/utils.h b/csrcs/fastdeploy/backends/ort/utils.h
deleted file mode 100644
index e2912ad38..000000000
--- a/csrcs/fastdeploy/backends/ort/utils.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "onnxruntime_cxx_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-// Convert FDDataType to OrtDataType
-ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype);
-
-// Convert OrtDataType to FDDataType
-FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype);
-
-// Create Ort::Value
-// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
-// While is_backend_cuda = true, and tensor.device = Device::GPU
-// Will directly share the cuda data in tensor to OrtValue
-Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false);
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/paddle/paddle_backend.cc b/csrcs/fastdeploy/backends/paddle/paddle_backend.cc
deleted file mode 100644
index 2fae38937..000000000
--- a/csrcs/fastdeploy/backends/paddle/paddle_backend.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-
-namespace fastdeploy {
-
-void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
-  if (option.use_gpu) {
-    config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
-  } else {
-    config_.DisableGpu();
-    if (option.enable_mkldnn) {
-      config_.EnableMKLDNN();
-      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
-    }
-  }
-  config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
-}
-
-bool PaddleBackend::InitFromPaddle(const std::string& model_file,
-                                   const std::string& params_file,
-                                   const PaddleBackendOption& option) {
-  if (initialized_) {
-    FDERROR << "PaddleBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  config_.SetModel(model_file, params_file);
-  BuildOption(option);
-  predictor_ = paddle_infer::CreatePredictor(config_);
-  std::vector<std::string> input_names = predictor_->GetInputNames();
-  std::vector<std::string> output_names = predictor_->GetOutputNames();
-  for (size_t i = 0; i < input_names.size(); ++i) {
-    auto handle = predictor_->GetInputHandle(input_names[i]);
-    TensorInfo info;
-    auto shape = handle->shape();
-    info.shape.assign(shape.begin(), shape.end());
-    info.dtype = PaddleDataTypeToFD(handle->type());
-    info.name = input_names[i];
-    inputs_desc_.emplace_back(info);
-  }
-  for (size_t i = 0; i < output_names.size(); ++i) {
-    auto handle = predictor_->GetOutputHandle(output_names[i]);
-    TensorInfo info;
-    auto shape = handle->shape();
-    info.shape.assign(shape.begin(), shape.end());
-    info.dtype = PaddleDataTypeToFD(handle->type());
-    info.name = output_names[i];
-    outputs_desc_.emplace_back(info);
-  }
-  initialized_ = true;
-  return true;
-}
-
-TensorInfo PaddleBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  return inputs_desc_[index];
-}
-
-TensorInfo PaddleBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  return outputs_desc_[index];
-}
-
-bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
-                          std::vector<FDTensor>* outputs) {
-  if (inputs.size() != inputs_desc_.size()) {
-    FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
-            << ") should keep same with the inputs of this model("
-            << inputs_desc_.size() << ")." << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto handle = predictor_->GetInputHandle(inputs[i].name);
-    ShareTensorFromCpu(handle.get(), inputs[i]);
-  }
-
-  predictor_->Run();
-  outputs->resize(outputs_desc_.size());
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
-    CopyTensorToCpu(handle, &((*outputs)[i]));
-  }
-  return true;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/paddle/paddle_backend.h b/csrcs/fastdeploy/backends/paddle/paddle_backend.h
deleted file mode 100644
index 99ca5eb1b..000000000
--- a/csrcs/fastdeploy/backends/paddle/paddle_backend.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "paddle_inference_api.h"  // NOLINT
-
-namespace fastdeploy {
-
-struct PaddleBackendOption {
-#ifdef WITH_GPU
-  bool use_gpu = true;
-#else
-  bool use_gpu = false;
-#endif
-  bool enable_mkldnn = true;
-
-  int mkldnn_cache_size = 1;
-  int cpu_thread_num = 8;
-  // initialize memory size(MB) for GPU
-  int gpu_mem_init_size = 100;
-  // gpu device id
-  int gpu_id = 0;
-};
-
-// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
-void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
-
-// Copy memory data from paddle_infer::Tensor to fastdeploy::FDTensor
-void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
-                     FDTensor* fd_tensor);
-
-// Convert data type from paddle inference to fastdeploy
-FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
-
-class PaddleBackend : public BaseBackend {
- public:
-  PaddleBackend() {}
-  virtual ~PaddleBackend() = default;
-  void BuildOption(const PaddleBackendOption& option);
-
-  bool InitFromPaddle(
-      const std::string& model_file, const std::string& params_file,
-      const PaddleBackendOption& option = PaddleBackendOption());
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-
-  int NumOutputs() const { return outputs_desc_.size(); }
-
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
- private:
-  paddle_infer::Config config_;
-  std::shared_ptr<paddle_infer::Predictor> predictor_;
-  std::vector<TensorInfo> inputs_desc_;
-  std::vector<TensorInfo> outputs_desc_;
-};
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/paddle/util.cc b/csrcs/fastdeploy/backends/paddle/util.cc
deleted file mode 100644
index 1ae5b3553..000000000
--- a/csrcs/fastdeploy/backends/paddle/util.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-
-namespace fastdeploy {
-void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) {
-  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
-  tensor->Reshape(shape);
-  if (fd_tensor.dtype == FDDataType::FP32) {
-    tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  } else if (fd_tensor.dtype == FDDataType::INT32) {
-    tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  } else if (fd_tensor.dtype == FDDataType::INT64) {
-    tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
-                              shape, paddle_infer::PlaceType::kCPU);
-    return;
-  }
-  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor.dtype) +
-                      ") while infer with PaddleBackend.");
-}
-
-void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
-                     FDTensor* fd_tensor) {
-  auto fd_dtype = PaddleDataTypeToFD(tensor->type());
-  std::vector<int64_t> shape;
-  auto tmp_shape = tensor->shape();
-  shape.assign(tmp_shape.begin(), tmp_shape.end());
-  fd_tensor->Allocate(shape, fd_dtype, tensor->name());
-  if (fd_tensor->dtype == FDDataType::FP32) {
-    tensor->CopyToCpu(static_cast<float*>(fd_tensor->MutableData()));
-    return;
-  } else if (fd_tensor->dtype == FDDataType::INT32) {
-    tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor->MutableData()));
-    return;
-  } else if (fd_tensor->dtype == FDDataType::INT64) {
-    tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor->MutableData()));
-    return;
-  }
-  FDASSERT(false, "Unexpected data type(" + Str(fd_tensor->dtype) +
-                      ") while infer with PaddleBackend.");
-}
-
-FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype) {
-  auto fd_dtype = FDDataType::FP32;
-  if (dtype == paddle_infer::FLOAT32) {
-    fd_dtype = FDDataType::FP32;
-  } else if (dtype == paddle_infer::INT64) {
-    fd_dtype = FDDataType::INT64;
-  } else if (dtype == paddle_infer::INT32) {
-    fd_dtype = FDDataType::INT32;
-  } else if (dtype == paddle_infer::UINT8) {
-    fd_dtype = FDDataType::UINT8;
-  } else {
-    FDASSERT(false, "Unexpected data type:" + std::to_string(int(dtype)) +
-                        " while call CopyTensorToCpu in PaddleBackend.");
-  }
-  return fd_dtype;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h b/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h
deleted file mode 100644
index 2484ccc68..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef BATCH_STREAM_H
-#define BATCH_STREAM_H
-
-#include "NvInfer.h"
-#include "common.h"
-#include <algorithm>
-#include <stdio.h>
-#include <vector>
-
-class IBatchStream {
- public:
-  virtual void reset(int firstBatch) = 0;
-  virtual bool next() = 0;
-  virtual void skip(int skipCount) = 0;
-  virtual float* getBatch() = 0;
-  virtual float* getLabels() = 0;
-  virtual int getBatchesRead() const = 0;
-  virtual int getBatchSize() const = 0;
-  virtual nvinfer1::Dims getDims() const = 0;
-};
-
-class MNISTBatchStream : public IBatchStream {
- public:
-  MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
-                   const std::string& labelsFile,
-                   const std::vector<std::string>& directories)
-      : mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
-  //!< We already know the dimensions of MNIST images.
-  {
-    readDataFile(locateFile(dataFile, directories));
-    readLabelsFile(locateFile(labelsFile, directories));
-  }
-
-  void reset(int firstBatch) override { mBatchCount = firstBatch; }
-
-  bool next() override {
-    if (mBatchCount >= mMaxBatches) {
-      return false;
-    }
-    ++mBatchCount;
-    return true;
-  }
-
-  void skip(int skipCount) override { mBatchCount += skipCount; }
-
-  float* getBatch() override {
-    return mData.data() +
-           (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
-  }
-
-  float* getLabels() override {
-    return mLabels.data() + (mBatchCount * mBatchSize);
-  }
-
-  int getBatchesRead() const override { return mBatchCount; }
-
-  int getBatchSize() const override { return mBatchSize; }
-
-  nvinfer1::Dims getDims() const override {
-    return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
-  }
-
- private:
-  void readDataFile(const std::string& dataFilePath) {
-    std::ifstream file{dataFilePath.c_str(), std::ios::binary};
-
-    int magicNumber, numImages, imageH, imageW;
-    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
-    // All values in the MNIST files are big endian.
-    magicNumber = samplesCommon::swapEndianness(magicNumber);
-    ASSERT(magicNumber == 2051 &&
-           "Magic Number does not match the expected value for an MNIST image "
-           "set");
-
-    // Read number of images and dimensions
-    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
-    file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
-    file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
-
-    numImages = samplesCommon::swapEndianness(numImages);
-    imageH = samplesCommon::swapEndianness(imageH);
-    imageW = samplesCommon::swapEndianness(imageW);
-
-    // The MNIST data is made up of unsigned bytes, so we need to cast to float
-    // and normalize.
-    int numElements = numImages * imageH * imageW;
-    std::vector<uint8_t> rawData(numElements);
-    file.read(reinterpret_cast<char*>(rawData.data()),
-              numElements * sizeof(uint8_t));
-    mData.resize(numElements);
-    std::transform(rawData.begin(), rawData.end(), mData.begin(),
-                   [](uint8_t val) { return static_cast<float>(val) / 255.f; });
-  }
-
-  void readLabelsFile(const std::string& labelsFilePath) {
-    std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
-    int magicNumber, numImages;
-    file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
-    // All values in the MNIST files are big endian.
-    magicNumber = samplesCommon::swapEndianness(magicNumber);
-    ASSERT(magicNumber == 2049 &&
-           "Magic Number does not match the expected value for an MNIST labels "
-           "file");
-
-    file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
-    numImages = samplesCommon::swapEndianness(numImages);
-
-    std::vector<uint8_t> rawLabels(numImages);
-    file.read(reinterpret_cast<char*>(rawLabels.data()),
-              numImages * sizeof(uint8_t));
-    mLabels.resize(numImages);
-    std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
-                   [](uint8_t val) { return static_cast<float>(val); });
-  }
-
-  int mBatchSize{0};
-  int mBatchCount{
-      0}; //!< The batch that will be read on the next invocation of next()
-  int mMaxBatches{0};
-  Dims mDims{};
-  std::vector<float> mData{};
-  std::vector<float> mLabels{};
-};
-
-class BatchStream : public IBatchStream {
- public:
-  BatchStream(int batchSize, int maxBatches, std::string prefix,
-              std::string suffix, std::vector<std::string> directories)
-      : mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
-        mSuffix(suffix), mDataDir(directories) {
-    FILE* file = fopen(
-        locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
-        "rb");
-    ASSERT(file != nullptr);
-    int d[4];
-    size_t readSize = fread(d, sizeof(int), 4, file);
-    ASSERT(readSize == 4);
-    mDims.nbDims = 4;  // The number of dimensions.
-    mDims.d[0] = d[0]; // Batch Size
-    mDims.d[1] = d[1]; // Channels
-    mDims.d[2] = d[2]; // Height
-    mDims.d[3] = d[3]; // Width
-    ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
-           mDims.d[3] > 0);
-    fclose(file);
-
-    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
-    mBatch.resize(mBatchSize * mImageSize, 0);
-    mLabels.resize(mBatchSize, 0);
-    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
-    mFileLabels.resize(mDims.d[0], 0);
-    reset(0);
-  }
-
-  BatchStream(int batchSize, int maxBatches, std::string prefix,
-              std::vector<std::string> directories)
-      : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
-
-  BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
-              std::string listFile, std::vector<std::string> directories)
-      : mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
-        mListFile(listFile), mDataDir(directories) {
-    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
-    mBatch.resize(mBatchSize * mImageSize, 0);
-    mLabels.resize(mBatchSize, 0);
-    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
-    mFileLabels.resize(mDims.d[0], 0);
-    reset(0);
-  }
-
-  // Resets data members
-  void reset(int firstBatch) override {
-    mBatchCount = 0;
-    mFileCount = 0;
-    mFileBatchPos = mDims.d[0];
-    skip(firstBatch);
-  }
-
-  // Advance to next batch and return true, or return false if there is no batch
-  // left.
-  bool next() override {
-    if (mBatchCount == mMaxBatches) {
-      return false;
-    }
-
-    for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
-         batchPos += csize, mFileBatchPos += csize) {
-      ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
-      if (mFileBatchPos == mDims.d[0] && !update()) {
-        return false;
-      }
-
-      // copy the smaller of: elements left to fulfill the request, or elements
-      // left in the file buffer.
-      csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
-      std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
-                  csize * mImageSize, getBatch() + batchPos * mImageSize);
-      std::copy_n(getFileLabels() + mFileBatchPos, csize,
-                  getLabels() + batchPos);
-    }
-    mBatchCount++;
-    return true;
-  }
-
-  // Skips the batches
-  void skip(int skipCount) override {
-    if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
-        mFileBatchPos == mDims.d[0]) {
-      mFileCount += skipCount * mBatchSize / mDims.d[0];
-      return;
-    }
-
-    int x = mBatchCount;
-    for (int i = 0; i < skipCount; i++) {
-      next();
-    }
-    mBatchCount = x;
-  }
-
-  float* getBatch() override { return mBatch.data(); }
-
-  float* getLabels() override { return mLabels.data(); }
-
-  int getBatchesRead() const override { return mBatchCount; }
-
-  int getBatchSize() const override { return mBatchSize; }
-
-  nvinfer1::Dims getDims() const override { return mDims; }
-
- private:
-  float* getFileBatch() { return mFileBatch.data(); }
-
-  float* getFileLabels() { return mFileLabels.data(); }
-
-  bool update() {
-    if (mListFile.empty()) {
-      std::string inputFileName = locateFile(
-          mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
-      FILE* file = fopen(inputFileName.c_str(), "rb");
-      if (!file) {
-        return false;
-      }
-
-      int d[4];
-      size_t readSize = fread(d, sizeof(int), 4, file);
-      ASSERT(readSize == 4);
-      ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
-             mDims.d[3] == d[3]);
-      size_t readInputCount =
-          fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
-      ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
-      size_t readLabelCount =
-          fread(getFileLabels(), sizeof(float), mDims.d[0], file);
-      ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
-
-      fclose(file);
-    } else {
-      std::vector<std::string> fNames;
-      std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
-      if (!file) {
-        return false;
-      }
-
-      sample::gLogInfo << "Batch #" << mFileCount << std::endl;
-      file.seekg(((mBatchCount * mBatchSize)) * 7);
-
-      for (int i = 1; i <= mBatchSize; i++) {
-        std::string sName;
-        std::getline(file, sName);
-        sName = sName + ".ppm";
-        sample::gLogInfo << "Calibrating with file " << sName << std::endl;
-        fNames.emplace_back(sName);
-      }
-
-      mFileCount++;
-
-      const int imageC = 3;
-      const int imageH = 300;
-      const int imageW = 300;
-      std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
-          fNames.size());
-      for (uint32_t i = 0; i < fNames.size(); ++i) {
-        readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
-      }
-
-      std::vector<float> data(samplesCommon::volume(mDims));
-      const float scale = 2.0 / 255.0;
-      const float bias = 1.0;
-      long int volChl = mDims.d[2] * mDims.d[3];
-
-      // Normalize input data
-      for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
-           i < mBatchSize; ++i) {
-        for (int c = 0; c < mDims.d[1]; ++c) {
-          for (int j = 0; j < volChl; ++j) {
-            data[i * volImg + c * volChl + j] =
-                scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
-          }
-        }
-      }
-
-      std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
-    }
-
-    mFileBatchPos = 0;
-    return true;
-  }
-
-  int mBatchSize{0};
-  int mMaxBatches{0};
-  int mBatchCount{0};
-  int mFileCount{0};
-  int mFileBatchPos{0};
-  int mImageSize{0};
-  std::vector<float> mBatch;      //!< Data for the batch
-  std::vector<float> mLabels;     //!< Labels for the batch
-  std::vector<float> mFileBatch;  //!< List of image files
-  std::vector<float> mFileLabels; //!< List of label files
-  std::string mPrefix;            //!< Batch file name prefix
-  std::string mSuffix;            //!< Batch file name suffix
-  nvinfer1::Dims mDims;           //!< Input dimensions
-  std::string mListFile;          //!< File name of the list of image names
-  std::vector<std::string>
-      mDataDir; //!< Directories where the files can be found
-};
-
-#endif
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg b/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
deleted file mode 100644
index 51ff339c1..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg
+++ /dev/null
@@ -1 +0,0 @@
-exclude_files=.*
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h b/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
deleted file mode 100644
index 40eb8f13e..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ENTROPY_CALIBRATOR_H
-#define ENTROPY_CALIBRATOR_H
-
-#include "BatchStream.h"
-#include "NvInfer.h"
-
-//! \class EntropyCalibratorImpl
-//!
-//! \brief Implements common functionality for Entropy calibrators.
-//!
-template <typename TBatchStream> class EntropyCalibratorImpl {
- public:
-  EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
-                        std::string networkName, const char* inputBlobName,
-                        bool readCache = true)
-      : mStream{stream},
-        mCalibrationTableName("CalibrationTable" + networkName),
-        mInputBlobName(inputBlobName), mReadCache(readCache) {
-    nvinfer1::Dims dims = mStream.getDims();
-    mInputCount = samplesCommon::volume(dims);
-    CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
-    mStream.reset(firstBatch);
-  }
-
-  virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
-
-  int getBatchSize() const noexcept { return mStream.getBatchSize(); }
-
-  bool getBatch(void* bindings[], const char* names[],
-                int nbBindings) noexcept {
-    if (!mStream.next()) {
-      return false;
-    }
-    CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
-                     mInputCount * sizeof(float), cudaMemcpyHostToDevice));
-    ASSERT(!strcmp(names[0], mInputBlobName));
-    bindings[0] = mDeviceInput;
-    return true;
-  }
-
-  const void* readCalibrationCache(size_t& length) noexcept {
-    mCalibrationCache.clear();
-    std::ifstream input(mCalibrationTableName, std::ios::binary);
-    input >> std::noskipws;
-    if (mReadCache && input.good()) {
-      std::copy(std::istream_iterator<char>(input),
-                std::istream_iterator<char>(),
-                std::back_inserter(mCalibrationCache));
-    }
-    length = mCalibrationCache.size();
-    return length ? mCalibrationCache.data() : nullptr;
-  }
-
-  void writeCalibrationCache(const void* cache, size_t length) noexcept {
-    std::ofstream output(mCalibrationTableName, std::ios::binary);
-    output.write(reinterpret_cast<const char*>(cache), length);
-  }
-
- private:
-  TBatchStream mStream;
-  size_t mInputCount;
-  std::string mCalibrationTableName;
-  const char* mInputBlobName;
-  bool mReadCache{true};
-  void* mDeviceInput{nullptr};
-  std::vector<char> mCalibrationCache;
-};
-
-//! \class Int8EntropyCalibrator2
-//!
-//! \brief Implements Entropy calibrator 2.
-//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
-//!
-template <typename TBatchStream>
-class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
- public:
-  Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
-                         const char* networkName, const char* inputBlobName,
-                         bool readCache = true)
-      : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
-
-  int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
-
-  bool getBatch(void* bindings[], const char* names[],
-                int nbBindings) noexcept override {
-    return mImpl.getBatch(bindings, names, nbBindings);
-  }
-
-  const void* readCalibrationCache(size_t& length) noexcept override {
-    return mImpl.readCalibrationCache(length);
-  }
-
-  void writeCalibrationCache(const void* cache,
-                             size_t length) noexcept override {
-    mImpl.writeCalibrationCache(cache, length);
-  }
-
- private:
-  EntropyCalibratorImpl<TBatchStream> mImpl;
-};
-
-#endif // ENTROPY_CALIBRATOR_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h b/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
deleted file mode 100644
index e13b55bd9..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ERROR_RECORDER_H
-#define ERROR_RECORDER_H
-#include "NvInferRuntimeCommon.h"
-#include "logger.h"
-#include <atomic>
-#include <cstdint>
-#include <exception>
-#include <mutex>
-#include <vector>
-
-using nvinfer1::ErrorCode;
-using nvinfer1::IErrorRecorder;
-
-//!
-//! A simple implementation of the IErrorRecorder interface for
-//! use by samples. This interface also can be used as a reference
-//! implementation.
-//! The sample Error recorder is based on a vector that pairs the error
-//! code and the error string into a single element. It also uses
-//! standard mutex's and atomics in order to make sure that the code
-//! works in a multi-threaded environment.
-//!
-class SampleErrorRecorder : public IErrorRecorder {
-  using errorPair = std::pair<ErrorCode, std::string>;
-  using errorStack = std::vector<errorPair>;
-
- public:
-  SampleErrorRecorder() = default;
-
-  virtual ~SampleErrorRecorder() noexcept {}
-  int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
-  ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
-    return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
-                                       : (*this)[errorIdx].first;
-  };
-  IErrorRecorder::ErrorDesc
-  getErrorDesc(int32_t errorIdx) const noexcept final {
-    return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
-                                       : (*this)[errorIdx].second.c_str();
-  }
-  // This class can never overflow since we have dynamic resize via std::vector
-  // usage.
-  bool hasOverflowed() const noexcept final { return false; }
-
-  // Empty the errorStack.
-  void clear() noexcept final {
-    try {
-      // grab a lock so that there is no addition while clearing.
-      std::lock_guard<std::mutex> guard(mStackLock);
-      mErrorStack.clear();
-    } catch (const std::exception& e) {
-      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
-    }
-  };
-
-  //! Simple helper function that
-  bool empty() const noexcept { return mErrorStack.empty(); }
-
-  bool reportError(ErrorCode val,
-                   IErrorRecorder::ErrorDesc desc) noexcept final {
-    try {
-      std::lock_guard<std::mutex> guard(mStackLock);
-      sample::gLogError << "Error[" << static_cast<int32_t>(val)
-                        << "]: " << desc << std::endl;
-      mErrorStack.push_back(errorPair(val, desc));
-    } catch (const std::exception& e) {
-      sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
-    }
-    // All errors are considered fatal.
-    return true;
-  }
-
-  // Atomically increment or decrement the ref counter.
-  IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
-  IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
-
- private:
-  // Simple helper functions.
-  const errorPair& operator[](size_t index) const noexcept {
-    return mErrorStack[index];
-  }
-
-  bool invalidIndexCheck(int32_t index) const noexcept {
-    // By converting signed to unsigned, we only need a single check since
-    // negative numbers turn into large positive greater than the size.
-    size_t sIndex = index;
-    return sIndex >= mErrorStack.size();
-  }
-  // Mutex to hold when locking mErrorStack.
-  std::mutex mStackLock;
-
-  // Reference count of the class. Destruction of the class when mRefCount
-  // is not zero causes undefined behavior.
-  std::atomic<int32_t> mRefCount{0};
-
-  // The error stack that holds the errors recorded by TensorRT.
-  errorStack mErrorStack;
-};     // class SampleErrorRecorder
-#endif // ERROR_RECORDER_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/README.md b/csrcs/fastdeploy/backends/tensorrt/common/README.md
deleted file mode 100644
index 0ed86b17a..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/README.md
+++ /dev/null
@@ -1 +0,0 @@
-目录代码来源自 https://github.com/NVIDIA/TensorRT
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h b/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h
deleted file mode 100644
index e2e1b1e95..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef TENSORRT_ARGS_PARSER_H
-#define TENSORRT_ARGS_PARSER_H
-
-#include <string>
-#include <vector>
-#ifdef _MSC_VER
-#include ".\windows\getopt.h"
-#else
-#include <getopt.h>
-#endif
-#include <iostream>
-
-namespace samplesCommon {
-
-//!
-//! \brief The SampleParams structure groups the basic parameters required by
-//!        all sample networks.
-//!
-struct SampleParams {
-  int32_t batchSize{1}; //!< Number of inputs in a batch
-  int32_t dlaCore{-1};  //!< Specify the DLA core to run network on.
-  bool int8{false};     //!< Allow runnning the network in Int8 mode.
-  bool fp16{false};     //!< Allow running the network in FP16 mode.
-  std::vector<std::string>
-      dataDirs; //!< Directory paths where sample data files are stored
-  std::vector<std::string> inputTensorNames;
-  std::vector<std::string> outputTensorNames;
-};
-
-//!
-//! \brief The CaffeSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use caffe
-//!
-struct CaffeSampleParams : public SampleParams {
-  std::string
-      prototxtFileName; //!< Filename of prototxt design file of a network
-  std::string
-      weightsFileName;      //!< Filename of trained weights file of a network
-  std::string meanFileName; //!< Filename of mean file of a network
-};
-
-//!
-//! \brief The OnnxSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use ONNX
-//!
-struct OnnxSampleParams : public SampleParams {
-  std::string onnxFileName; //!< Filename of ONNX file of a network
-};
-
-//!
-//! \brief The UffSampleParams structure groups the additional parameters
-//! required by
-//!         networks that use Uff
-//!
-struct UffSampleParams : public SampleParams {
-  std::string uffFileName; //!< Filename of uff file of a network
-};
-
-//!
-//! /brief Struct to maintain command-line arguments.
-//!
-struct Args {
-  bool runInInt8{false};
-  bool runInFp16{false};
-  bool help{false};
-  int32_t useDLACore{-1};
-  int32_t batch{1};
-  std::vector<std::string> dataDirs;
-  std::string saveEngine;
-  std::string loadEngine;
-  bool useILoop{false};
-};
-
-//!
-//! \brief Populates the Args struct with the provided command-line parameters.
-//!
-//! \throw invalid_argument if any of the arguments are not valid
-//!
-//! \return boolean If return value is true, execution can continue, otherwise
-//! program should exit
-//!
-inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
-  while (1) {
-    int32_t arg;
-    static struct option long_options[] = {
-        {"help", no_argument, 0, 'h'},
-        {"datadir", required_argument, 0, 'd'},
-        {"int8", no_argument, 0, 'i'},
-        {"fp16", no_argument, 0, 'f'},
-        {"useILoop", no_argument, 0, 'l'},
-        {"saveEngine", required_argument, 0, 's'},
-        {"loadEngine", no_argument, 0, 'o'},
-        {"useDLACore", required_argument, 0, 'u'},
-        {"batch", required_argument, 0, 'b'},
-        {nullptr, 0, nullptr, 0}};
-    int32_t option_index = 0;
-    arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
-    if (arg == -1) {
-      break;
-    }
-
-    switch (arg) {
-    case 'h':
-      args.help = true;
-      return true;
-    case 'd':
-      if (optarg) {
-        args.dataDirs.push_back(optarg);
-      } else {
-        std::cerr << "ERROR: --datadir requires option argument" << std::endl;
-        return false;
-      }
-      break;
-    case 's':
-      if (optarg) {
-        args.saveEngine = optarg;
-      }
-      break;
-    case 'o':
-      if (optarg) {
-        args.loadEngine = optarg;
-      }
-      break;
-    case 'i':
-      args.runInInt8 = true;
-      break;
-    case 'f':
-      args.runInFp16 = true;
-      break;
-    case 'l':
-      args.useILoop = true;
-      break;
-    case 'u':
-      if (optarg) {
-        args.useDLACore = std::stoi(optarg);
-      }
-      break;
-    case 'b':
-      if (optarg) {
-        args.batch = std::stoi(optarg);
-      }
-      break;
-    default:
-      return false;
-    }
-  }
-  return true;
-}
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_ARGS_PARSER_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/buffers.h b/csrcs/fastdeploy/backends/tensorrt/common/buffers.h
deleted file mode 100644
index 8061ee33d..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/buffers.h
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef TENSORRT_BUFFERS_H
-#define TENSORRT_BUFFERS_H
-
-#include "NvInfer.h"
-#include "common.h"
-#include "half.h"
-#include <cassert>
-#include <cuda_runtime_api.h>
-#include <iostream>
-#include <iterator>
-#include <memory>
-#include <new>
-#include <numeric>
-#include <string>
-#include <vector>
-
-namespace samplesCommon {
-
-//!
-//! \brief  The GenericBuffer class is a templated class for buffers.
-//!
-//! \details This templated RAII (Resource Acquisition Is Initialization) class
-//! handles the allocation,
-//!          deallocation, querying of buffers on both the device and the host.
-//!          It can handle data of arbitrary types because it stores byte
-//!          buffers.
-//!          The template parameters AllocFunc and FreeFunc are used for the
-//!          allocation and deallocation of the buffer.
-//!          AllocFunc must be a functor that takes in (void** ptr, size_t size)
-//!          and returns bool. ptr is a pointer to where the allocated buffer
-//!          address should be stored.
-//!          size is the amount of memory in bytes to allocate.
-//!          The boolean indicates whether or not the memory allocation was
-//!          successful.
-//!          FreeFunc must be a functor that takes in (void* ptr) and returns
-//!          void.
-//!          ptr is the allocated buffer address. It must work with nullptr
-//!          input.
-//!
-template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
- public:
-  //!
-  //! \brief Construct an empty buffer.
-  //!
-  GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
-      : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
-
-  //!
-  //! \brief Construct a buffer with the specified allocation size in bytes.
-  //!
-  GenericBuffer(size_t size, nvinfer1::DataType type)
-      : mSize(size), mCapacity(size), mType(type) {
-    if (!allocFn(&mBuffer, this->nbBytes())) {
-      throw std::bad_alloc();
-    }
-  }
-
-  GenericBuffer(GenericBuffer&& buf)
-      : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
-        mBuffer(buf.mBuffer) {
-    buf.mSize = 0;
-    buf.mCapacity = 0;
-    buf.mType = nvinfer1::DataType::kFLOAT;
-    buf.mBuffer = nullptr;
-  }
-
-  GenericBuffer& operator=(GenericBuffer&& buf) {
-    if (this != &buf) {
-      freeFn(mBuffer);
-      mSize = buf.mSize;
-      mCapacity = buf.mCapacity;
-      mType = buf.mType;
-      mBuffer = buf.mBuffer;
-      // Reset buf.
-      buf.mSize = 0;
-      buf.mCapacity = 0;
-      buf.mBuffer = nullptr;
-    }
-    return *this;
-  }
-
-  //!
-  //! \brief Returns pointer to underlying array.
-  //!
-  void* data() { return mBuffer; }
-
-  //!
-  //! \brief Returns pointer to underlying array.
-  //!
-  const void* data() const { return mBuffer; }
-
-  //!
-  //! \brief Returns the size (in number of elements) of the buffer.
-  //!
-  size_t size() const { return mSize; }
-
-  //!
-  //! \brief Returns the size (in bytes) of the buffer.
-  //!
-  size_t nbBytes() const {
-    return this->size() * samplesCommon::getElementSize(mType);
-  }
-
-  //!
-  //! \brief Resizes the buffer. This is a no-op if the new size is smaller than
-  //! or equal to the current capacity.
-  //!
-  void resize(size_t newSize) {
-    mSize = newSize;
-    if (mCapacity < newSize) {
-      freeFn(mBuffer);
-      if (!allocFn(&mBuffer, this->nbBytes())) {
-        throw std::bad_alloc{};
-      }
-      mCapacity = newSize;
-    }
-  }
-
-  //!
-  //! \brief Overload of resize that accepts Dims
-  //!
-  void resize(const nvinfer1::Dims& dims) {
-    return this->resize(samplesCommon::volume(dims));
-  }
-
-  ~GenericBuffer() { freeFn(mBuffer); }
-
- private:
-  size_t mSize{0}, mCapacity{0};
-  nvinfer1::DataType mType;
-  void* mBuffer;
-  AllocFunc allocFn;
-  FreeFunc freeFn;
-};
-
-class DeviceAllocator {
- public:
-  bool operator()(void** ptr, size_t size) const {
-    return cudaMalloc(ptr, size) == cudaSuccess;
-  }
-};
-
-class DeviceFree {
- public:
-  void operator()(void* ptr) const { cudaFree(ptr); }
-};
-
-class HostAllocator {
- public:
-  bool operator()(void** ptr, size_t size) const {
-    *ptr = malloc(size);
-    return *ptr != nullptr;
-  }
-};
-
-class HostFree {
- public:
-  void operator()(void* ptr) const { free(ptr); }
-};
-
-using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
-using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
-
-//!
-//! \brief  The ManagedBuffer class groups together a pair of corresponding
-//! device and host buffers.
-//!
-class ManagedBuffer {
- public:
-  DeviceBuffer deviceBuffer;
-  HostBuffer hostBuffer;
-};
-
-//!
-//! \brief  The BufferManager class handles host and device buffer allocation
-//! and deallocation.
-//!
-//! \details This RAII class handles host and device buffer allocation and
-//! deallocation,
-//!          memcpy between host and device buffers to aid with inference,
-//!          and debugging dumps to validate inference. The BufferManager class
-//!          is meant to be
-//!          used to simplify buffer management and any interactions between
-//!          buffers and the engine.
-//!
-class BufferManager {
- public:
-  static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
-
-  //!
-  //! \brief Create a BufferManager for handling buffer interactions with
-  //! engine.
-  //!
-  BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
-                const int batchSize = 0,
-                const nvinfer1::IExecutionContext* context = nullptr)
-      : mEngine(engine), mBatchSize(batchSize) {
-    // Full Dims implies no batch size.
-    assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
-    // Create host and device buffers
-    for (int i = 0; i < mEngine->getNbBindings(); i++) {
-      auto dims = context ? context->getBindingDimensions(i)
-                          : mEngine->getBindingDimensions(i);
-      size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
-      nvinfer1::DataType type = mEngine->getBindingDataType(i);
-      int vecDim = mEngine->getBindingVectorizedDim(i);
-      if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
-      {
-        int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
-        dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
-        vol *= scalarsPerVec;
-      }
-      vol *= samplesCommon::volume(dims);
-      std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
-      manBuf->deviceBuffer = DeviceBuffer(vol, type);
-      manBuf->hostBuffer = HostBuffer(vol, type);
-      mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
-      mManagedBuffers.emplace_back(std::move(manBuf));
-    }
-  }
-
-  //!
-  //! \brief Returns a vector of device buffers that you can use directly as
-  //!        bindings for the execute and enqueue methods of IExecutionContext.
-  //!
-  std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
-
-  //!
-  //! \brief Returns a vector of device buffers.
-  //!
-  const std::vector<void*>& getDeviceBindings() const {
-    return mDeviceBindings;
-  }
-
-  //!
-  //! \brief Returns the device buffer corresponding to tensorName.
-  //!        Returns nullptr if no such tensor can be found.
-  //!
-  void* getDeviceBuffer(const std::string& tensorName) const {
-    return getBuffer(false, tensorName);
-  }
-
-  //!
-  //! \brief Returns the host buffer corresponding to tensorName.
-  //!        Returns nullptr if no such tensor can be found.
-  //!
-  void* getHostBuffer(const std::string& tensorName) const {
-    return getBuffer(true, tensorName);
-  }
-
-  //!
-  //! \brief Returns the size of the host and device buffers that correspond to
-  //! tensorName.
-  //!        Returns kINVALID_SIZE_VALUE if no such tensor can be found.
-  //!
-  size_t size(const std::string& tensorName) const {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1)
-      return kINVALID_SIZE_VALUE;
-    return mManagedBuffers[index]->hostBuffer.nbBytes();
-  }
-
-  //!
-  //! \brief Dump host buffer with specified tensorName to ostream.
-  //!        Prints error message to std::ostream if no such tensor can be
-  //!        found.
-  //!
-  void dumpBuffer(std::ostream& os, const std::string& tensorName) {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1) {
-      os << "Invalid tensor name" << std::endl;
-      return;
-    }
-    void* buf = mManagedBuffers[index]->hostBuffer.data();
-    size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
-    nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
-    size_t rowCount = static_cast<size_t>(
-        bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
-    int leadDim = mBatchSize;
-    int* trailDims = bufDims.d;
-    int nbDims = bufDims.nbDims;
-
-    // Fix explicit Dimension networks
-    if (!leadDim && nbDims > 0) {
-      leadDim = bufDims.d[0];
-      ++trailDims;
-      --nbDims;
-    }
-
-    os << "[" << leadDim;
-    for (int i = 0; i < nbDims; i++)
-      os << ", " << trailDims[i];
-    os << "]" << std::endl;
-    switch (mEngine->getBindingDataType(index)) {
-    case nvinfer1::DataType::kINT32:
-      print<int32_t>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kFLOAT:
-      print<float>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kHALF:
-      print<half_float::half>(os, buf, bufSize, rowCount);
-      break;
-    case nvinfer1::DataType::kINT8:
-      assert(0 && "Int8 network-level input and output is not supported");
-      break;
-    case nvinfer1::DataType::kBOOL:
-      assert(0 && "Bool network-level input and output are not supported");
-      break;
-    }
-  }
-
-  //!
-  //! \brief Templated print function that dumps buffers of arbitrary type to
-  //! std::ostream.
-  //!        rowCount parameter controls how many elements are on each line.
-  //!        A rowCount of 1 means that there is only 1 element on each line.
-  //!
-  template <typename T>
-  void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
-    assert(rowCount != 0);
-    assert(bufSize % sizeof(T) == 0);
-    T* typedBuf = static_cast<T*>(buf);
-    size_t numItems = bufSize / sizeof(T);
-    for (int i = 0; i < static_cast<int>(numItems); i++) {
-      // Handle rowCount == 1 case
-      if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
-        os << typedBuf[i] << std::endl;
-      else if (rowCount == 1)
-        os << typedBuf[i];
-      // Handle rowCount > 1 case
-      else if (i % rowCount == 0)
-        os << typedBuf[i];
-      else if (i % rowCount == rowCount - 1)
-        os << " " << typedBuf[i] << std::endl;
-      else
-        os << " " << typedBuf[i];
-    }
-  }
-
-  //!
-  //! \brief Copy the contents of input host buffers to input device buffers
-  //! synchronously.
-  //!
-  void copyInputToDevice() { memcpyBuffers(true, false, false); }
-
-  //!
-  //! \brief Copy the contents of output device buffers to output host buffers
-  //! synchronously.
-  //!
-  void copyOutputToHost() { memcpyBuffers(false, true, false); }
-
-  //!
-  //! \brief Copy the contents of input host buffers to input device buffers
-  //! asynchronously.
-  //!
-  void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
-    memcpyBuffers(true, false, true, stream);
-  }
-
-  //!
-  //! \brief Copy the contents of output device buffers to output host buffers
-  //! asynchronously.
-  //!
-  void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
-    memcpyBuffers(false, true, true, stream);
-  }
-
-  ~BufferManager() = default;
-
- private:
-  void* getBuffer(const bool isHost, const std::string& tensorName) const {
-    int index = mEngine->getBindingIndex(tensorName.c_str());
-    if (index == -1)
-      return nullptr;
-    return (isHost ? mManagedBuffers[index]->hostBuffer.data()
-                   : mManagedBuffers[index]->deviceBuffer.data());
-  }
-
-  void memcpyBuffers(const bool copyInput, const bool deviceToHost,
-                     const bool async, const cudaStream_t& stream = 0) {
-    for (int i = 0; i < mEngine->getNbBindings(); i++) {
-      void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
-                                  : mManagedBuffers[i]->deviceBuffer.data();
-      const void* srcPtr = deviceToHost
-                               ? mManagedBuffers[i]->deviceBuffer.data()
-                               : mManagedBuffers[i]->hostBuffer.data();
-      const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
-      const cudaMemcpyKind memcpyType =
-          deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
-      if ((copyInput && mEngine->bindingIsInput(i)) ||
-          (!copyInput && !mEngine->bindingIsInput(i))) {
-        if (async)
-          CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
-        else
-          CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
-      }
-    }
-  }
-
-  std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
-  int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
-  std::vector<std::unique_ptr<ManagedBuffer>>
-      mManagedBuffers; //!< The vector of pointers to managed buffers
-  std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
-                                      //! for engine execution
-};
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_BUFFERS_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/common.h b/csrcs/fastdeploy/backends/tensorrt/common/common.h
deleted file mode 100644
index ad3af72a2..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/common.h
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_COMMON_H
-#define TENSORRT_COMMON_H
-
-// For loadLibrary
-#ifdef _MSC_VER
-// Needed so that the max/min definitions in windows.h do not conflict with
-// std::max/min.
-#define NOMINMAX
-#include <windows.h>
-#undef NOMINMAX
-#else
-#include <dlfcn.h>
-#endif
-
-#include "NvInfer.h"
-#include "NvInferPlugin.h"
-#include "logger.h"
-#include <algorithm>
-#include <cassert>
-#include <chrono>
-#include <cmath>
-#include <cstring>
-#include <cuda_runtime_api.h>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <new>
-#include <numeric>
-#include <ratio>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "safeCommon.h"
-
-using namespace nvinfer1;
-using namespace plugin;
-
-#ifdef _MSC_VER
-#define FN_NAME __FUNCTION__
-#else
-#define FN_NAME __func__
-#endif
-
-#if defined(__aarch64__) || defined(__QNX__)
-#define ENABLE_DLA_API 1
-#endif
-
-#define CHECK_RETURN_W_MSG(status, val, errMsg)                                \
-  do {                                                                         \
-    if (!(status)) {                                                           \
-      sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
-                        << FN_NAME << "(), line " << __LINE__ << std::endl;    \
-      return val;                                                              \
-    }                                                                          \
-  } while (0)
-
-#undef ASSERT
-#define ASSERT(condition)                                                      \
-  do {                                                                         \
-    if (!(condition)) {                                                        \
-      sample::gLogError << "Assertion failure: " << #condition << std::endl;   \
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
-
-#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
-
-template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
-  CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
-  auto deleter = [](T* t) { t->destroy(); };
-  return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
-}
-
-constexpr long double operator"" _GiB(long double val) {
-  return val * (1 << 30);
-}
-constexpr long double operator"" _MiB(long double val) {
-  return val * (1 << 20);
-}
-constexpr long double operator"" _KiB(long double val) {
-  return val * (1 << 10);
-}
-
-// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
-// Since the return type is signed, -1_GiB will work as expected.
-constexpr long long int operator"" _GiB(unsigned long long val) {
-  return val * (1 << 30);
-}
-constexpr long long int operator"" _MiB(unsigned long long val) {
-  return val * (1 << 20);
-}
-constexpr long long int operator"" _KiB(unsigned long long val) {
-  return val * (1 << 10);
-}
-
-struct SimpleProfiler : public nvinfer1::IProfiler {
-  struct Record {
-    float time{0};
-    int count{0};
-  };
-
-  virtual void reportLayerTime(const char* layerName, float ms) noexcept {
-    mProfile[layerName].count++;
-    mProfile[layerName].time += ms;
-    if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
-        mLayerNames.end()) {
-      mLayerNames.push_back(layerName);
-    }
-  }
-
-  SimpleProfiler(const char* name,
-                 const std::vector<SimpleProfiler>& srcProfilers =
-                     std::vector<SimpleProfiler>())
-      : mName(name) {
-    for (const auto& srcProfiler : srcProfilers) {
-      for (const auto& rec : srcProfiler.mProfile) {
-        auto it = mProfile.find(rec.first);
-        if (it == mProfile.end()) {
-          mProfile.insert(rec);
-        } else {
-          it->second.time += rec.second.time;
-          it->second.count += rec.second.count;
-        }
-      }
-    }
-  }
-
-  friend std::ostream& operator<<(std::ostream& out,
-                                  const SimpleProfiler& value) {
-    out << "========== " << value.mName << " profile ==========" << std::endl;
-    float totalTime = 0;
-    std::string layerNameStr = "TensorRT layer name";
-    int maxLayerNameLength =
-        std::max(static_cast<int>(layerNameStr.size()), 70);
-    for (const auto& elem : value.mProfile) {
-      totalTime += elem.second.time;
-      maxLayerNameLength =
-          std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
-    }
-
-    auto old_settings = out.flags();
-    auto old_precision = out.precision();
-    // Output header
-    {
-      out << std::setw(maxLayerNameLength) << layerNameStr << " ";
-      out << std::setw(12) << "Runtime, "
-          << "%"
-          << " ";
-      out << std::setw(12) << "Invocations"
-          << " ";
-      out << std::setw(12) << "Runtime, ms" << std::endl;
-    }
-    for (size_t i = 0; i < value.mLayerNames.size(); i++) {
-      const std::string layerName = value.mLayerNames[i];
-      auto elem = value.mProfile.at(layerName);
-      out << std::setw(maxLayerNameLength) << layerName << " ";
-      out << std::setw(12) << std::fixed << std::setprecision(1)
-          << (elem.time * 100.0F / totalTime) << "%"
-          << " ";
-      out << std::setw(12) << elem.count << " ";
-      out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
-          << std::endl;
-    }
-    out.flags(old_settings);
-    out.precision(old_precision);
-    out << "========== " << value.mName << " total runtime = " << totalTime
-        << " ms ==========" << std::endl;
-
-    return out;
-  }
-
- private:
-  std::string mName;
-  std::vector<std::string> mLayerNames;
-  std::map<std::string, Record> mProfile;
-};
-
-//! Locate path to file, given its filename or filepath suffix and possible dirs
-//! it might lie in.
-//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
-//! file path.
-inline std::string locateFile(const std::string& filepathSuffix,
-                              const std::vector<std::string>& directories,
-                              bool reportError = true) {
-  const int MAX_DEPTH{10};
-  bool found{false};
-  std::string filepath;
-
-  for (auto& dir : directories) {
-    if (!dir.empty() && dir.back() != '/') {
-#ifdef _MSC_VER
-      filepath = dir + "\\" + filepathSuffix;
-#else
-      filepath = dir + "/" + filepathSuffix;
-#endif
-    } else {
-      filepath = dir + filepathSuffix;
-    }
-
-    for (int i = 0; i < MAX_DEPTH && !found; i++) {
-      const std::ifstream checkFile(filepath);
-      found = checkFile.is_open();
-      if (found) {
-        break;
-      }
-
-      filepath = "../" + filepath; // Try again in parent dir
-    }
-
-    if (found) {
-      break;
-    }
-
-    filepath.clear();
-  }
-
-  // Could not find the file
-  if (filepath.empty()) {
-    const std::string dirList = std::accumulate(
-        directories.begin() + 1, directories.end(), directories.front(),
-        [](const std::string& a, const std::string& b) {
-          return a + "\n\t" + b;
-        });
-    std::cout << "Could not find " << filepathSuffix
-              << " in data directories:\n\t" << dirList << std::endl;
-
-    if (reportError) {
-      std::cout << "&&&& FAILED" << std::endl;
-      exit(EXIT_FAILURE);
-    }
-  }
-
-  return filepath;
-}
-
-inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
-                        int inW) {
-  std::ifstream infile(fileName, std::ifstream::binary);
-  assert(infile.is_open() &&
-         "Attempting to read from a file that is not open.");
-  std::string magic, h, w, max;
-  infile >> magic >> h >> w >> max;
-  infile.seekg(1, infile.cur);
-  infile.read(reinterpret_cast<char*>(buffer), inH * inW);
-}
-
-namespace samplesCommon {
-
-// Swaps endianness of an integral type.
-template <typename T,
-          typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-inline T swapEndianness(const T& value) {
-  uint8_t bytes[sizeof(T)];
-  for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
-    bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
-  }
-  return *reinterpret_cast<T*>(bytes);
-}
-
-class HostMemory {
- public:
-  HostMemory() = delete;
-  virtual void* data() const noexcept { return mData; }
-  virtual std::size_t size() const noexcept { return mSize; }
-  virtual DataType type() const noexcept { return mType; }
-  virtual ~HostMemory() {}
-
- protected:
-  HostMemory(std::size_t size, DataType type)
-      : mData{nullptr}, mSize(size), mType(type) {}
-  void* mData;
-  std::size_t mSize;
-  DataType mType;
-};
-
-template <typename ElemType, DataType dataType>
-class TypedHostMemory : public HostMemory {
- public:
-  explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
-    mData = new ElemType[size];
-  };
-  ~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
-  ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
-};
-
-using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
-using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
-using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
-
-inline void* safeCudaMalloc(size_t memSize) {
-  void* deviceMem;
-  CHECK(cudaMalloc(&deviceMem, memSize));
-  if (deviceMem == nullptr) {
-    std::cerr << "Out of memory" << std::endl;
-    exit(1);
-  }
-  return deviceMem;
-}
-
-inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
-
-struct InferDeleter {
-  template <typename T> void operator()(T* obj) const { delete obj; }
-};
-
-template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
-
-static auto StreamDeleter = [](cudaStream_t* pStream) {
-  if (pStream) {
-    cudaStreamDestroy(*pStream);
-    delete pStream;
-  }
-};
-
-inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
-  std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
-      new cudaStream_t, StreamDeleter);
-  if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
-      cudaSuccess) {
-    pStream.reset(nullptr);
-  }
-
-  return pStream;
-}
-
-//! Return vector of indices that puts magnitudes of sequence in descending
-//! order.
-template <class Iter>
-std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
-  std::vector<size_t> indices(end - begin);
-  std::iota(indices.begin(), indices.end(), 0);
-  std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
-    return std::abs(begin[j]) < std::abs(begin[i]);
-  });
-  return indices;
-}
-
-inline bool readReferenceFile(const std::string& fileName,
-                              std::vector<std::string>& refVector) {
-  std::ifstream infile(fileName);
-  if (!infile.is_open()) {
-    std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
-                 "that is not open."
-              << std::endl;
-    return false;
-  }
-  std::string line;
-  while (std::getline(infile, line)) {
-    if (line.empty())
-      continue;
-    refVector.push_back(line);
-  }
-  infile.close();
-  return true;
-}
-
-template <typename T>
-std::vector<std::string> classify(const std::vector<std::string>& refVector,
-                                  const std::vector<T>& output,
-                                  const size_t topK) {
-  const auto inds =
-      samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
-  std::vector<std::string> result;
-  result.reserve(topK);
-  for (size_t k = 0; k < topK; ++k) {
-    result.push_back(refVector[inds[k]]);
-  }
-  return result;
-}
-
-// Returns indices of highest K magnitudes in v.
-template <typename T>
-std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
-  std::vector<size_t> indices =
-      samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
-  indices.resize(k);
-  return indices;
-}
-
-template <typename T>
-bool readASCIIFile(const std::string& fileName, const size_t size,
-                   std::vector<T>& out) {
-  std::ifstream infile(fileName);
-  if (!infile.is_open()) {
-    std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
-                 "not open."
-              << std::endl;
-    return false;
-  }
-  out.clear();
-  out.reserve(size);
-  out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
-  infile.close();
-  return true;
-}
-
-template <typename T>
-bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
-  std::ofstream outfile(fileName);
-  if (!outfile.is_open()) {
-    std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
-                 "not open."
-              << std::endl;
-    return false;
-  }
-  for (auto fn : in) {
-    outfile << fn << "\n";
-  }
-  outfile.close();
-  return true;
-}
-
-inline void print_version() {
-  std::cout << "  TensorRT version: " << NV_TENSORRT_MAJOR << "."
-            << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
-            << NV_TENSORRT_BUILD << std::endl;
-}
-
-inline std::string getFileType(const std::string& filepath) {
-  return filepath.substr(filepath.find_last_of(".") + 1);
-}
-
-inline std::string toLower(const std::string& inp) {
-  std::string out = inp;
-  std::transform(out.begin(), out.end(), out.begin(), ::tolower);
-  return out;
-}
-
-inline float getMaxValue(const float* buffer, int64_t size) {
-  assert(buffer != nullptr);
-  assert(size > 0);
-  return *std::max_element(buffer, buffer + size);
-}
-
-// Ensures that every tensor used by a network has a dynamic range set.
-//
-// All tensors in a network must have a dynamic range specified if a calibrator
-// is not used.
-// This function is just a utility to globally fill in missing scales and
-// zero-points for the entire network.
-//
-// If a tensor does not have a dyanamic range set, it is assigned inRange or
-// outRange as follows:
-//
-// * If the tensor is the input to a layer or output of a pooling node, its
-// dynamic range is derived from inRange.
-// * Otherwise its dynamic range is derived from outRange.
-//
-// The default parameter values are intended to demonstrate, for final layers in
-// the network,
-// cases where dynamic ranges are asymmetric.
-//
-// The default parameter values choosen arbitrarily. Range values should be
-// choosen such that
-// we avoid underflow or overflow. Also range value should be non zero to avoid
-// uniform zero scale tensor.
-inline void setAllDynamicRanges(INetworkDefinition* network,
-                                float inRange = 2.0f, float outRange = 4.0f) {
-  // Ensure that all layer inputs have a scale.
-  for (int i = 0; i < network->getNbLayers(); i++) {
-    auto layer = network->getLayer(i);
-    for (int j = 0; j < layer->getNbInputs(); j++) {
-      ITensor* input{layer->getInput(j)};
-      // Optional inputs are nullptr here and are from RNN layers.
-      if (input != nullptr && !input->dynamicRangeIsSet()) {
-        ASSERT(input->setDynamicRange(-inRange, inRange));
-      }
-    }
-  }
-
-  // Ensure that all layer outputs have a scale.
-  // Tensors that are also inputs to layers are ingored here
-  // since the previous loop nest assigned scales to them.
-  for (int i = 0; i < network->getNbLayers(); i++) {
-    auto layer = network->getLayer(i);
-    for (int j = 0; j < layer->getNbOutputs(); j++) {
-      ITensor* output{layer->getOutput(j)};
-      // Optional outputs are nullptr here and are from RNN layers.
-      if (output != nullptr && !output->dynamicRangeIsSet()) {
-        // Pooling must have the same input and output scales.
-        if (layer->getType() == LayerType::kPOOLING) {
-          ASSERT(output->setDynamicRange(-inRange, inRange));
-        } else {
-          ASSERT(output->setDynamicRange(-outRange, outRange));
-        }
-      }
-    }
-  }
-}
-
-inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
-                                      INetworkDefinition* n) {
-  // Set dummy per-tensor dynamic range if Int8 mode is requested.
-  if (c->getFlag(BuilderFlag::kINT8)) {
-    sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
-                           "per-tensor dynamic range. Int8 accuracy is not "
-                           "guaranteed."
-                        << std::endl;
-    setAllDynamicRanges(n);
-  }
-}
-
-inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
-                      bool allowGPUFallback = true) {
-  if (useDLACore >= 0) {
-    if (builder->getNbDLACores() == 0) {
-      std::cerr << "Trying to use DLA core " << useDLACore
-                << " on a platform that doesn't have any DLA cores"
-                << std::endl;
-      assert(
-          "Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
-          false);
-    }
-    if (allowGPUFallback) {
-      config->setFlag(BuilderFlag::kGPU_FALLBACK);
-    }
-    if (!config->getFlag(BuilderFlag::kINT8)) {
-      // User has not requested INT8 Mode.
-      // By default run in FP16 mode. FP32 mode is not permitted.
-      config->setFlag(BuilderFlag::kFP16);
-    }
-    config->setDefaultDeviceType(DeviceType::kDLA);
-    config->setDLACore(useDLACore);
-  }
-}
-
-inline int32_t parseDLA(int32_t argc, char** argv) {
-  for (int32_t i = 1; i < argc; i++) {
-    if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
-      return std::stoi(argv[i] + 13);
-    }
-  }
-  return -1;
-}
-
-inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
-  switch (t) {
-  case nvinfer1::DataType::kINT32:
-    return 4;
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kBOOL:
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  }
-  return 0;
-}
-
-inline int64_t volume(const nvinfer1::Dims& d) {
-  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
-}
-
-template <int C, int H, int W> struct PPM {
-  std::string magic, fileName;
-  int h, w, max;
-  uint8_t buffer[C * H * W];
-};
-
-// New vPPM(variable sized PPM) class with variable dimensions.
-struct vPPM {
-  std::string magic, fileName;
-  int h, w, max;
-  std::vector<uint8_t> buffer;
-};
-
-struct BBox {
-  float x1, y1, x2, y2;
-};
-
-template <int C, int H, int W>
-void readPPMFile(const std::string& filename,
-                 samplesCommon::PPM<C, H, W>& ppm) {
-  ppm.fileName = filename;
-  std::ifstream infile(filename, std::ifstream::binary);
-  assert(infile.is_open() &&
-         "Attempting to read from a file that is not open.");
-  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
-  infile.seekg(1, infile.cur);
-  infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
-}
-
-inline void readPPMFile(const std::string& filename, vPPM& ppm,
-                        std::vector<std::string>& input_dir) {
-  ppm.fileName = filename;
-  std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
-  infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
-  infile.seekg(1, infile.cur);
-
-  for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
-    ppm.buffer.push_back(0);
-  }
-
-  infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
-}
-
-template <int C, int H, int W>
-void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
-                          const BBox& bbox) {
-  std::ofstream outfile("./" + filename, std::ofstream::binary);
-  assert(!outfile.fail());
-  outfile << "P6"
-          << "\n"
-          << ppm.w << " " << ppm.h << "\n"
-          << ppm.max << "\n";
-
-  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
-  const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
-  const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
-  const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
-  const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
-
-  for (int x = x1; x <= x2; ++x) {
-    // bbox top border
-    ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
-    ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
-    ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
-    // bbox bottom border
-    ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
-    ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
-    ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
-  }
-
-  for (int y = y1; y <= y2; ++y) {
-    // bbox left border
-    ppm.buffer[(y * ppm.w + x1) * 3] = 255;
-    ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
-    ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
-    // bbox right border
-    ppm.buffer[(y * ppm.w + x2) * 3] = 255;
-    ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
-    ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
-  }
-
-  outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
-}
-
-inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
-                                 std::vector<BBox>& dets) {
-  std::ofstream outfile("./" + filename, std::ofstream::binary);
-  assert(!outfile.fail());
-  outfile << "P6"
-          << "\n"
-          << ppm.w << " " << ppm.h << "\n"
-          << ppm.max << "\n";
-  auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
-
-  for (auto bbox : dets) {
-    for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
-      // bbox top border
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
-      ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
-      // bbox bottom border
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
-      ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
-    }
-
-    for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
-      // bbox left border
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
-      ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
-      // bbox right border
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
-      ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
-    }
-  }
-
-  outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
-}
-
-class TimerBase {
- public:
-  virtual void start() {}
-  virtual void stop() {}
-  float microseconds() const noexcept { return mMs * 1000.f; }
-  float milliseconds() const noexcept { return mMs; }
-  float seconds() const noexcept { return mMs / 1000.f; }
-  void reset() noexcept { mMs = 0.f; }
-
- protected:
-  float mMs{0.0f};
-};
-
-class GpuTimer : public TimerBase {
- public:
-  explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
-    CHECK(cudaEventCreate(&mStart));
-    CHECK(cudaEventCreate(&mStop));
-  }
-  ~GpuTimer() {
-    CHECK(cudaEventDestroy(mStart));
-    CHECK(cudaEventDestroy(mStop));
-  }
-  void start() { CHECK(cudaEventRecord(mStart, mStream)); }
-  void stop() {
-    CHECK(cudaEventRecord(mStop, mStream));
-    float ms{0.0f};
-    CHECK(cudaEventSynchronize(mStop));
-    CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
-    mMs += ms;
-  }
-
- private:
-  cudaEvent_t mStart, mStop;
-  cudaStream_t mStream;
-}; // class GpuTimer
-
-template <typename Clock> class CpuTimer : public TimerBase {
- public:
-  using clock_type = Clock;
-
-  void start() { mStart = Clock::now(); }
-  void stop() {
-    mStop = Clock::now();
-    mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
-  }
-
- private:
-  std::chrono::time_point<Clock> mStart, mStop;
-}; // class CpuTimer
-
-using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
-
-inline std::vector<std::string> splitString(std::string str,
-                                            char delimiter = ',') {
-  std::vector<std::string> splitVect;
-  std::stringstream ss(str);
-  std::string substr;
-
-  while (ss.good()) {
-    getline(ss, substr, delimiter);
-    splitVect.emplace_back(std::move(substr));
-  }
-  return splitVect;
-}
-
-// Return m rounded up to nearest multiple of n
-inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
-
-inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
-
-inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
-
-inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
-
-inline void loadLibrary(const std::string& path) {
-#ifdef _MSC_VER
-  void* handle = LoadLibrary(path.c_str());
-#else
-  int32_t flags{RTLD_LAZY};
-#if ENABLE_ASAN
-  // https://github.com/google/sanitizers/issues/89
-  // asan doesn't handle module unloading correctly and there are no plans on
-  // doing
-  // so. In order to get proper stack traces, don't delete the shared library on
-  // close so that asan can resolve the symbols correctly.
-  flags |= RTLD_NODELETE;
-#endif // ENABLE_ASAN
-
-  void* handle = dlopen(path.c_str(), flags);
-#endif
-  if (handle == nullptr) {
-#ifdef _MSC_VER
-    sample::gLogError << "Could not load plugin library: " << path << std::endl;
-#else
-    sample::gLogError << "Could not load plugin library: " << path
-                      << ", due to: " << dlerror() << std::endl;
-#endif
-  }
-}
-
-inline int32_t getSMVersion() {
-  int32_t deviceIndex = 0;
-  CHECK(cudaGetDevice(&deviceIndex));
-
-  int32_t major, minor;
-  CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
-                               deviceIndex));
-  CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
-                               deviceIndex));
-
-  return ((major << 8) | minor);
-}
-
-inline bool isSMSafe() {
-  const int32_t smVersion = getSMVersion();
-  return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
-         smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
-}
-
-inline bool isDataTypeSupported(DataType dataType) {
-  auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
-      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
-  if (!builder) {
-    return false;
-  }
-
-  if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
-      (dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
-    return false;
-  }
-
-  return true;
-}
-
-} // namespace samplesCommon
-
-inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
-  os << "(";
-  for (int i = 0; i < dims.nbDims; ++i) {
-    os << (i ? ", " : "") << dims.d[i];
-  }
-  return os << ")";
-}
-
-#endif // TENSORRT_COMMON_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp
deleted file mode 100644
index 84b06581a..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "getOptions.h"
-#include "logger.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <cstring>
-#include <set>
-
-namespace nvinfer1 {
-namespace utility {
-
-//! Matching for TRTOptions is defined as follows:
-//!
-//! If A and B both have longName set, A matches B if and only if A.longName ==
-//! B.longName and (A.shortName == B.shortName if both have short name set).
-//!
-//! If A only has shortName set and B only has longName set, then A does not
-//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
-//! the definition of a TRTOption in the input to getOptions. As such, if the
-//! definition only has shortName set, it will never be equal to a TRTOption
-//! that does not have shortName set (and same for longName).
-//!
-//! If A and B both have shortName set but B does not have longName set, A
-//! matches B if and only if A.shortName == B.shortName.
-//!
-//! If A has neither long or short name set, A matches B if and only if B has
-//! neither long or short name set.
-bool matches(const TRTOption& a, const TRTOption& b) {
-  if (!a.longName.empty() && !b.longName.empty()) {
-    if (a.shortName && b.shortName) {
-      return (a.longName == b.longName) && (a.shortName == b.shortName);
-    }
-    return a.longName == b.longName;
-  }
-
-  // If only one of them is not set, this will return false anyway.
-  return a.shortName == b.shortName;
-}
-
-//! getTRTOptionIndex returns the index of a TRTOption in a vector of
-//! TRTOptions, -1 if not found.
-int getTRTOptionIndex(const std::vector<TRTOption>& options,
-                      const TRTOption& opt) {
-  for (size_t i = 0; i < options.size(); ++i) {
-    if (matches(opt, options[i])) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-//! validateTRTOption will return a string containing an error message if
-//! options
-//! contain non-numeric characters, or if there are duplicate option names
-//! found.
-//! Otherwise, returns the empty string.
-std::string validateTRTOption(const std::set<char>& seenShortNames,
-                              const std::set<std::string>& seenLongNames,
-                              const TRTOption& opt) {
-  if (opt.shortName != 0) {
-    if (!std::isalnum(opt.shortName)) {
-      return "Short name '" + std::to_string(opt.shortName) +
-             "' is non-alphanumeric";
-    }
-
-    if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
-      return "Short name '" + std::to_string(opt.shortName) +
-             "' is a duplicate";
-    }
-  }
-
-  if (!opt.longName.empty()) {
-    for (const char& c : opt.longName) {
-      if (!std::isalnum(c) && c != '-' && c != '_') {
-        return "Long name '" + opt.longName +
-               "' contains characters that are not '-', '_', or alphanumeric";
-      }
-    }
-
-    if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
-      return "Long name '" + opt.longName + "' is a duplicate";
-    }
-  }
-  return "";
-}
-
-//! validateTRTOptions will return a string containing an error message if any
-//! options contain non-numeric characters, or if there are duplicate option
-//! names found. Otherwise, returns the empty string.
-std::string validateTRTOptions(const std::vector<TRTOption>& options) {
-  std::set<char> seenShortNames;
-  std::set<std::string> seenLongNames;
-  for (size_t i = 0; i < options.size(); ++i) {
-    const std::string errMsg =
-        validateTRTOption(seenShortNames, seenLongNames, options[i]);
-    if (!errMsg.empty()) {
-      return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
-    }
-
-    seenShortNames.insert(options[i].shortName);
-    seenLongNames.insert(options[i].longName);
-  }
-  return "";
-}
-
-//! parseArgs parses an argument list and returns a TRTParsedArgs with the
-//! fields set accordingly. Assumes that options is validated.
-//! ErrMsg will be set if:
-//!     - an argument is null
-//!     - an argument is empty
-//!     - an argument does not have option (i.e. "-" and "--")
-//!     - a short argument has more than 1 character
-//!     - the last argument in the list requires a value
-TRTParsedArgs parseArgs(int argc, const char* const* argv,
-                        const std::vector<TRTOption>& options) {
-  TRTParsedArgs parsedArgs;
-  parsedArgs.values.resize(options.size());
-
-  for (int i = 1; i < argc; ++i) // index of current command-line argument
-  {
-    if (argv[i] == nullptr) {
-      return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
-    }
-
-    const std::string argStr(argv[i]);
-    if (argStr.empty()) {
-      return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
-    }
-
-    // No starting hyphen means it is a positional argument
-    if (argStr[0] != '-') {
-      parsedArgs.positionalArgs.push_back(argStr);
-      continue;
-    }
-
-    if (argStr == "-" || argStr == "--") {
-      return TRTParsedArgs{"Argument does not specify an option at index " +
-                           std::to_string(i)};
-    }
-
-    // If only 1 hyphen, char after is the flag.
-    TRTOption opt{' ', "", false, ""};
-    std::string value;
-    if (argStr[1] != '-') {
-      // Must only have 1 char after the hyphen
-      if (argStr.size() > 2) {
-        return TRTParsedArgs{
-            "Short arg contains more than 1 character at index " +
-            std::to_string(i)};
-      }
-      opt.shortName = argStr[1];
-    } else {
-      opt.longName = argStr.substr(2);
-
-      // We need to support --foo=bar syntax, so look for '='
-      const size_t eqIndex = opt.longName.find('=');
-      if (eqIndex < opt.longName.size()) {
-        value = opt.longName.substr(eqIndex + 1);
-        opt.longName = opt.longName.substr(0, eqIndex);
-      }
-    }
-
-    const int idx = getTRTOptionIndex(options, opt);
-    if (idx < 0) {
-      continue;
-    }
-
-    if (options[idx].valueRequired) {
-      if (!value.empty()) {
-        parsedArgs.values[idx].second.push_back(value);
-        parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
-        continue;
-      }
-
-      if (i + 1 >= argc) {
-        return TRTParsedArgs{"Last argument requires value, but none given"};
-      }
-
-      const std::string nextArg(argv[i + 1]);
-      if (nextArg.size() >= 1 && nextArg[0] == '-') {
-        sample::gLogWarning << "Warning: Using '" << nextArg
-                            << "' as a value for '" << argStr
-                            << "', Should this be its own flag?" << std::endl;
-      }
-
-      parsedArgs.values[idx].second.push_back(nextArg);
-      i += 1; // Next argument already consumed
-
-      parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
-    } else {
-      parsedArgs.values[idx].first += 1;
-    }
-  }
-  return parsedArgs;
-}
-
-TRTParsedArgs getOptions(int argc, const char* const* argv,
-                         const std::vector<TRTOption>& options) {
-  const std::string errMsg = validateTRTOptions(options);
-  if (!errMsg.empty()) {
-    return TRTParsedArgs{errMsg};
-  }
-  return parseArgs(argc, argv, options);
-}
-} // namespace utility
-} // namespace nvinfer1
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h
deleted file mode 100644
index efe466632..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_GET_OPTIONS_H
-#define TRT_GET_OPTIONS_H
-
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace nvinfer1 {
-namespace utility {
-
-//! TRTOption defines a command line option. At least 1 of shortName and
-//! longName
-//! must be defined.
-//! If bool initialization is undefined behavior on your system, valueRequired
-//! must also be explicitly defined.
-//! helpText is optional.
-struct TRTOption {
-  char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
-  std::string longName; //!< Option name in long (double hyphen) form (i.e.
-                        //!--foo, --bar)
-  bool valueRequired;   //!< True if a value is needed for an option (i.e. -N 4,
-                        //!--foo bar)
-  std::string helpText; //!< Text to show when printing out the command usage
-};
-
-//! TRTParsedArgs is returned by getOptions after it has parsed a command line
-//! argument list (argv).
-//!
-//! errMsg is a string containing an error message if any errors occurred. If it
-//! is empty, no errors occurred.
-//!
-//! values stores a vector of pairs for each option (ordered by order in the
-//! input). Each pair contains an int (the number of occurrences) and a vector
-//! of strings (a list of values). The user should know which of these to use,
-//! and which options required values. For non-value options, only occurrences
-//! is
-//! populated. For value-required options, occurrences == # of values. Values do
-//! not need to be unique.
-//!
-//! positionalArgs stores additional arguments that are passed in without an
-//! option (these must not start with a hyphen).
-struct TRTParsedArgs {
-  std::string errMsg;
-  std::vector<std::pair<int, std::vector<std::string>>> values;
-  std::vector<std::string> positionalArgs;
-};
-
-//! Parse the input arguments passed to main() and extract options as well as
-//! positional arguments.
-//!
-//! Options are supposed to be passed to main() with a preceding hyphen '-'.
-//!
-//! If there is a single preceding hyphen, there should be exactly 1 character
-//! after the hyphen, which is interpreted as the option.
-//!
-//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
-//! is interpreted as the option.
-//!
-//! If the option requires a value, the next argument is used as the value.
-//!
-//! Positional arguments must not start with a hyphen.
-//!
-//! If an argument requires a value, the next argument is interpreted as the
-//! value, even if it is the form of a valid option (i.e. --foo --bar will store
-//! "--bar" as a value for option "foo" if "foo" requires a value).
-//! We also support --name=value syntax. In this case, 'value' would be used as
-//! the value, NOT the next argument.
-//!
-//! For options:
-//!   { { 'a', "", false },
-//!     { 'b', "", false },
-//!     { 0, "cee", false },
-//!     { 'd', "", true },
-//!     { 'e', "", true },
-//!     { 'f', "foo", true } }
-//!
-//! ./main hello world -a -a --cee -d 12 -f 34
-//! and
-//! ./main hello world -a -a --cee -d 12 --foo 34
-//!
-//! will result in:
-//!
-//! TRTParsedArgs {
-//!      errMsg: "",
-//!      values: { { 2, {} },
-//!                { 0, {} },
-//!                { 1, {} },
-//!                { 1, {"12"} },
-//!                { 0, {} },
-//!                { 1, {"34"} } }
-//!      positionalArgs: {"hello", "world"},
-//! }
-//!
-//! Non-POSIX behavior:
-//!      - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
-//!        option must have its own hyphen prefix.
-//!      - Does not support -e12 as a shorthand for "-e 12". Values MUST be
-//!        whitespace-separated from the option it is for.
-//!
-//! @param[in] argc The number of arguments passed to main (including the
-//!            file name, which is disregarded)
-//! @param[in] argv The arguments passed to main (including the file name,
-//!            which is disregarded)
-//! @param[in] options List of TRTOptions to parse
-//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
-//!         the fields.
-TRTParsedArgs getOptions(int argc, const char* const* argv,
-                         const std::vector<TRTOption>& options);
-} // namespace utility
-} // namespace nvinfer1
-
-#endif // TRT_GET_OPTIONS_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/half.h b/csrcs/fastdeploy/backends/tensorrt/common/half.h
deleted file mode 100644
index 5ca797000..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/half.h
+++ /dev/null
@@ -1,3787 +0,0 @@
-// half - IEEE 754-based half-precision floating point library.
-//
-// Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated
-// documentation files (the "Software"), to deal in the Software without
-// restriction, including without limitation the
-// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-// sell copies of the Software, and to
-// permit persons to whom the Software is furnished to do so, subject to the
-// following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the
-// Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR
-// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
-// OR OTHER DEALINGS IN THE SOFTWARE.
-
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Version 1.12.0
-
-/// \file
-/// Main header file for half precision functionality.
-
-#ifndef HALF_HALF_HPP
-#define HALF_HALF_HPP
-
-/// Combined gcc version number.
-#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-// check C++11 language features
-#if defined(__clang__) // clang
-#if __has_feature(cxx_static_assert) &&                                        \
-    !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
-#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
-#endif
-#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
-#define HALF_ENABLE_CPP11_CONSTEXPR 1
-#endif
-#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
-#define HALF_ENABLE_CPP11_NOEXCEPT 1
-#endif
-#if __has_feature(cxx_user_literals) &&                                        \
-    !defined(HALF_ENABLE_CPP11_USER_LITERALS)
-#define HALF_ENABLE_CPP11_USER_LITERALS 1
-#endif
-#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) &&         \
-    !defined(HALF_ENABLE_CPP11_LONG_LONG)
-#define HALF_ENABLE_CPP11_LONG_LONG 1
-#endif
-/*#elif defined(__INTEL_COMPILER)
-   //Intel C++
-    #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
-   ????????
-        #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
-    #endif
-    #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
-   ????????
-        #define HALF_ENABLE_CPP11_CONSTEXPR 1
-    #endif
-    #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
-   ????????
-        #define HALF_ENABLE_CPP11_NOEXCEPT 1
-    #endif
-    #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
-   ????????
-        #define HALF_ENABLE_CPP11_LONG_LONG 1
-    #endif*/
-#elif defined(__GNUC__) // gcc
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
-#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
-#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
-#endif
-#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
-#define HALF_ENABLE_CPP11_CONSTEXPR 1
-#endif
-#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
-#define HALF_ENABLE_CPP11_NOEXCEPT 1
-#endif
-#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
-#define HALF_ENABLE_CPP11_USER_LITERALS 1
-#endif
-#if !defined(HALF_ENABLE_CPP11_LONG_LONG)
-#define HALF_ENABLE_CPP11_LONG_LONG 1
-#endif
-#endif
-#elif defined(_MSC_VER) // Visual C++
-#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
-#define HALF_ENABLE_CPP11_CONSTEXPR 1
-#endif
-#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
-#define HALF_ENABLE_CPP11_NOEXCEPT 1
-#endif
-#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
-#define HALF_ENABLE_CPP11_USER_LITERALS 1
-#endif
-#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
-#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
-#endif
-#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
-#define HALF_ENABLE_CPP11_LONG_LONG 1
-#endif
-#define HALF_POP_WARNINGS 1
-#pragma warning(push)
-#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if,
-// negative unsigned
-#endif
-
-// check C++11 library features
-#include <utility>
-#if defined(_LIBCPP_VERSION) // libc++
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
-#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
-#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
-#endif
-#ifndef HALF_ENABLE_CPP11_CSTDINT
-#define HALF_ENABLE_CPP11_CSTDINT 1
-#endif
-#ifndef HALF_ENABLE_CPP11_CMATH
-#define HALF_ENABLE_CPP11_CMATH 1
-#endif
-#ifndef HALF_ENABLE_CPP11_HASH
-#define HALF_ENABLE_CPP11_HASH 1
-#endif
-#endif
-#elif defined(__GLIBCXX__) // libstdc++
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
-#ifdef __clang__
-#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
-#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
-#endif
-#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT)
-#define HALF_ENABLE_CPP11_CSTDINT 1
-#endif
-#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH)
-#define HALF_ENABLE_CPP11_CMATH 1
-#endif
-#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH)
-#define HALF_ENABLE_CPP11_HASH 1
-#endif
-#else
-#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT)
-#define HALF_ENABLE_CPP11_CSTDINT 1
-#endif
-#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH)
-#define HALF_ENABLE_CPP11_CMATH 1
-#endif
-#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH)
-#define HALF_ENABLE_CPP11_HASH 1
-#endif
-#endif
-#endif
-#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++
-#if _CPPLIB_VER >= 520
-#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
-#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
-#endif
-#ifndef HALF_ENABLE_CPP11_CSTDINT
-#define HALF_ENABLE_CPP11_CSTDINT 1
-#endif
-#ifndef HALF_ENABLE_CPP11_HASH
-#define HALF_ENABLE_CPP11_HASH 1
-#endif
-#endif
-#if _CPPLIB_VER >= 610
-#ifndef HALF_ENABLE_CPP11_CMATH
-#define HALF_ENABLE_CPP11_CMATH 1
-#endif
-#endif
-#endif
-#undef HALF_GNUC_VERSION
-
-// support constexpr
-#if HALF_ENABLE_CPP11_CONSTEXPR
-#define HALF_CONSTEXPR constexpr
-#define HALF_CONSTEXPR_CONST constexpr
-#else
-#define HALF_CONSTEXPR
-#define HALF_CONSTEXPR_CONST const
-#endif
-
-// support noexcept
-#if HALF_ENABLE_CPP11_NOEXCEPT
-#define HALF_NOEXCEPT noexcept
-#define HALF_NOTHROW noexcept
-#else
-#define HALF_NOEXCEPT
-#define HALF_NOTHROW throw()
-#endif
-
-#include <algorithm>
-#include <climits>
-#include <cmath>
-#include <cstring>
-#include <iostream>
-#include <limits>
-#if HALF_ENABLE_CPP11_TYPE_TRAITS
-#include <type_traits>
-#endif
-#if HALF_ENABLE_CPP11_CSTDINT
-#include <cstdint>
-#endif
-#if HALF_ENABLE_CPP11_HASH
-#include <functional>
-#endif
-
-/// Default rounding mode.
-/// This specifies the rounding mode used for all conversions between
-/// [half](\ref half_float::half)s and `float`s as
-/// well as for the half_cast() if not specifying a rounding mode explicitly. It
-/// can be redefined (before including
-/// half.hpp) to one of the standard rounding modes using their respective
-/// constants or the equivalent values of
-/// `std::float_round_style`:
-///
-/// `std::float_round_style`         | value | rounding
-/// ---------------------------------|-------|-------------------------
-/// `std::round_indeterminate`       | -1    | fastest (default)
-/// `std::round_toward_zero`         | 0     | toward zero
-/// `std::round_to_nearest`          | 1     | to nearest
-/// `std::round_toward_infinity`     | 2     | toward positive infinity
-/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
-///
-/// By default this is set to `-1` (`std::round_indeterminate`), which uses
-/// truncation (round toward zero, but with
-/// overflows set to infinity) and is the fastest rounding mode possible. It can
-/// even be set to
-/// `std::numeric_limits<float>::round_style` to synchronize the rounding mode
-/// with that of the underlying
-/// single-precision implementation.
-#ifndef HALF_ROUND_STYLE
-#define HALF_ROUND_STYLE 1 // = std::round_to_nearest
-#endif
-
-/// Tie-breaking behaviour for round to nearest.
-/// This specifies if ties in round to nearest should be resolved by rounding to
-/// the nearest even value. By default this
-/// is defined to `0` resulting in the faster but slightly more biased behaviour
-/// of rounding away from zero in half-way
-/// cases (and thus equal to the round() function), but can be redefined to `1`
-/// (before including half.hpp) if more
-/// IEEE-conformant behaviour is needed.
-#ifndef HALF_ROUND_TIES_TO_EVEN
-#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero
-#endif
-
-/// Value signaling overflow.
-/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to
-/// a positive value signaling the overflow
-/// of an operation, in particular it just evaluates to positive infinity.
-#define HUGE_VALH std::numeric_limits<half_float::half>::infinity()
-
-/// Fast half-precision fma function.
-/// This symbol is only defined if the fma() function generally executes as fast
-/// as, or faster than, a separate
-/// half-precision multiplication followed by an addition. Due to the internal
-/// single-precision implementation of all
-/// arithmetic operations, this is in fact always the case.
-#define FP_FAST_FMAH 1
-
-#ifndef FP_ILOGB0
-#define FP_ILOGB0 INT_MIN
-#endif
-#ifndef FP_ILOGBNAN
-#define FP_ILOGBNAN INT_MAX
-#endif
-#ifndef FP_SUBNORMAL
-#define FP_SUBNORMAL 0
-#endif
-#ifndef FP_ZERO
-#define FP_ZERO 1
-#endif
-#ifndef FP_NAN
-#define FP_NAN 2
-#endif
-#ifndef FP_INFINITE
-#define FP_INFINITE 3
-#endif
-#ifndef FP_NORMAL
-#define FP_NORMAL 4
-#endif
-
-/// Main namespace for half precision functionality.
-/// This namespace contains all the functionality provided by the library.
-namespace half_float {
-class half;
-
-#if HALF_ENABLE_CPP11_USER_LITERALS
-/// Library-defined half-precision literals.
-/// Import this namespace to enable half-precision floating point literals:
-/// ~~~~{.cpp}
-/// using namespace half_float::literal;
-/// half_float::half = 4.2_h;
-/// ~~~~
-namespace literal {
-half operator"" _h(long double);
-}
-#endif
-
-/// \internal
-/// \brief Implementation details.
-namespace detail {
-#if HALF_ENABLE_CPP11_TYPE_TRAITS
-/// Conditional type.
-template <bool B, typename T, typename F>
-struct conditional : std::conditional<B, T, F> {};
-
-/// Helper for tag dispatching.
-template <bool B> struct bool_type : std::integral_constant<bool, B> {};
-using std::false_type;
-using std::true_type;
-
-/// Type traits for floating point types.
-template <typename T> struct is_float : std::is_floating_point<T> {};
-#else
-/// Conditional type.
-template <bool, typename T, typename> struct conditional { typedef T type; };
-template <typename T, typename F> struct conditional<false, T, F> {
-  typedef F type;
-};
-
-/// Helper for tag dispatching.
-template <bool> struct bool_type {};
-typedef bool_type<true> true_type;
-typedef bool_type<false> false_type;
-
-/// Type traits for floating point types.
-template <typename> struct is_float : false_type {};
-template <typename T> struct is_float<const T> : is_float<T> {};
-template <typename T> struct is_float<volatile T> : is_float<T> {};
-template <typename T> struct is_float<const volatile T> : is_float<T> {};
-template <> struct is_float<float> : true_type {};
-template <> struct is_float<double> : true_type {};
-template <> struct is_float<long double> : true_type {};
-#endif
-
-/// Type traits for floating point bits.
-template <typename T> struct bits { typedef unsigned char type; };
-template <typename T> struct bits<const T> : bits<T> {};
-template <typename T> struct bits<volatile T> : bits<T> {};
-template <typename T> struct bits<const volatile T> : bits<T> {};
-
-#if HALF_ENABLE_CPP11_CSTDINT
-/// Unsigned integer of (at least) 16 bits width.
-typedef std::uint_least16_t uint16;
-
-/// Unsigned integer of (at least) 32 bits width.
-template <> struct bits<float> { typedef std::uint_least32_t type; };
-
-/// Unsigned integer of (at least) 64 bits width.
-template <> struct bits<double> { typedef std::uint_least64_t type; };
-#else
-/// Unsigned integer of (at least) 16 bits width.
-typedef unsigned short uint16;
-
-/// Unsigned integer of (at least) 32 bits width.
-template <>
-struct bits<float>
-    : conditional<std::numeric_limits<unsigned int>::digits >= 32, unsigned int,
-                  unsigned long> {};
-
-#if HALF_ENABLE_CPP11_LONG_LONG
-/// Unsigned integer of (at least) 64 bits width.
-template <>
-struct bits<double>
-    : conditional<std::numeric_limits<unsigned long>::digits >= 64,
-                  unsigned long, unsigned long long> {};
-#else
-/// Unsigned integer of (at least) 64 bits width.
-template <> struct bits<double> { typedef unsigned long type; };
-#endif
-#endif
-
-/// Tag type for binary construction.
-struct binary_t {};
-
-/// Tag for binary construction.
-HALF_CONSTEXPR_CONST binary_t binary = binary_t();
-
-/// Temporary half-precision expression.
-/// This class represents a half-precision expression which just stores a
-/// single-precision value internally.
-struct expr {
-  /// Conversion constructor.
-  /// \param f single-precision value to convert
-  explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {}
-
-  /// Conversion to single-precision.
-  /// \return single precision value representing expression value
-  HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; }
-
- private:
-  /// Internal expression value stored in single-precision.
-  float value_;
-};
-
-/// SFINAE helper for generic half-precision functions.
-/// This class template has to be specialized for each valid combination of
-/// argument types to provide a corresponding
-/// `type` member equivalent to \a T.
-/// \tparam T type to return
-template <typename T, typename, typename = void, typename = void>
-struct enable {};
-template <typename T> struct enable<T, half, void, void> { typedef T type; };
-template <typename T> struct enable<T, expr, void, void> { typedef T type; };
-template <typename T> struct enable<T, half, half, void> { typedef T type; };
-template <typename T> struct enable<T, half, expr, void> { typedef T type; };
-template <typename T> struct enable<T, expr, half, void> { typedef T type; };
-template <typename T> struct enable<T, expr, expr, void> { typedef T type; };
-template <typename T> struct enable<T, half, half, half> { typedef T type; };
-template <typename T> struct enable<T, half, half, expr> { typedef T type; };
-template <typename T> struct enable<T, half, expr, half> { typedef T type; };
-template <typename T> struct enable<T, half, expr, expr> { typedef T type; };
-template <typename T> struct enable<T, expr, half, half> { typedef T type; };
-template <typename T> struct enable<T, expr, half, expr> { typedef T type; };
-template <typename T> struct enable<T, expr, expr, half> { typedef T type; };
-template <typename T> struct enable<T, expr, expr, expr> { typedef T type; };
-
-/// Return type for specialized generic 2-argument half-precision functions.
-/// This class template has to be specialized for each valid combination of
-/// argument types to provide a corresponding
-/// `type` member denoting the appropriate return type.
-/// \tparam T first argument type
-/// \tparam U first argument type
-template <typename T, typename U> struct result : enable<expr, T, U> {};
-template <> struct result<half, half> { typedef half type; };
-
-/// \name Classification helpers
-/// \{
-
-/// Check for infinity.
-/// \tparam T argument type (builtin floating point type)
-/// \param arg value to query
-/// \retval true if infinity
-/// \retval false else
-template <typename T> bool builtin_isinf(T arg) {
-#if HALF_ENABLE_CPP11_CMATH
-  return std::isinf(arg);
-#elif defined(_MSC_VER)
-  return !::_finite(static_cast<double>(arg)) &&
-         !::_isnan(static_cast<double>(arg));
-#else
-  return arg == std::numeric_limits<T>::infinity() ||
-         arg == -std::numeric_limits<T>::infinity();
-#endif
-}
-
-/// Check for NaN.
-/// \tparam T argument type (builtin floating point type)
-/// \param arg value to query
-/// \retval true if not a number
-/// \retval false else
-template <typename T> bool builtin_isnan(T arg) {
-#if HALF_ENABLE_CPP11_CMATH
-  return std::isnan(arg);
-#elif defined(_MSC_VER)
-  return ::_isnan(static_cast<double>(arg)) != 0;
-#else
-  return arg != arg;
-#endif
-}
-
-/// Check sign.
-/// \tparam T argument type (builtin floating point type)
-/// \param arg value to query
-/// \retval true if signbit set
-/// \retval false else
-template <typename T> bool builtin_signbit(T arg) {
-#if HALF_ENABLE_CPP11_CMATH
-  return std::signbit(arg);
-#else
-  return arg < T() || (arg == T() && T(1) / arg < T());
-#endif
-}
-
-/// \}
-/// \name Conversion
-/// \{
-
-/// Convert IEEE single-precision to half-precision.
-/// Credit for this goes to [Jeroen van der
-/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \param value single-precision value
-/// \return binary representation of half-precision value
-template <std::float_round_style R>
-uint16 float2half_impl(float value, true_type) {
-  typedef bits<float>::type uint32;
-  uint32 bits; // = *reinterpret_cast<uint32*>(&value);
-               // //violating
-               // strict aliasing!
-  std::memcpy(&bits, &value, sizeof(float));
-  /*			uint16 hbits = (bits>>16) & 0x8000;
-              bits &= 0x7FFFFFFF;
-              int exp = bits >> 23;
-              if(exp == 255)
-                  return hbits | 0x7C00 |
-     (0x3FF&-static_cast<unsigned>((bits&0x7FFFFF)!=0));
-              if(exp > 142)
-              {
-                  if(R == std::round_toward_infinity)
-                      return hbits | 0x7C00 - (hbits>>15);
-                  if(R == std::round_toward_neg_infinity)
-                      return hbits | 0x7BFF + (hbits>>15);
-                  return hbits | 0x7BFF + (R!=std::round_toward_zero);
-              }
-              int g, s;
-              if(exp > 112)
-              {
-                  g = (bits>>12) & 1;
-                  s = (bits&0xFFF) != 0;
-                  hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF);
-              }
-              else if(exp > 101)
-              {
-                  int i = 125 - exp;
-                  bits = (bits&0x7FFFFF) | 0x800000;
-                  g = (bits>>i) & 1;
-                  s = (bits&((1L<<i)-1)) != 0;
-                  hbits |= bits >> (i+1);
-              }
-              else
-              {
-                  g = 0;
-                  s = bits != 0;
-              }
-              if(R == std::round_to_nearest)
-                  #if HALF_ROUND_TIES_TO_EVEN
-                      hbits += g & (s|hbits);
-                  #else
-                      hbits += g;
-                  #endif
-              else if(R == std::round_toward_infinity)
-                  hbits += ~(hbits>>15) & (s|g);
-              else if(R == std::round_toward_neg_infinity)
-                  hbits += (hbits>>15) & (g|s);
-  */
-  static const uint16 base_table[512] = {
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-      0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,
-      0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000,
-      0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400,
-      0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800,
-      0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
-      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
-      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
-      0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,
-      0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400,
-      0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800,
-      0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00,
-      0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
-      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00};
-  static const unsigned char shift_table[512] = {
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19,
-      18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23,
-      22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-      13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-      24, 24, 24, 24, 24, 24, 24, 13};
-  uint16 hbits =
-      base_table[bits >> 23] +
-      static_cast<uint16>((bits & 0x7FFFFF) >> shift_table[bits >> 23]);
-  if (R == std::round_to_nearest)
-    hbits +=
-        (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) |
-         (((bits >> 23) & 0xFF) == 102)) &
-        ((hbits & 0x7C00) != 0x7C00)
-#if HALF_ROUND_TIES_TO_EVEN
-        & (((((static_cast<uint32>(1) << (shift_table[bits >> 23] - 1)) - 1) &
-             bits) != 0) |
-           hbits)
-#endif
-        ;
-  else if (R == std::round_toward_zero)
-    hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23];
-  else if (R == std::round_toward_infinity)
-    hbits +=
-        ((((bits & 0x7FFFFF &
-            ((static_cast<uint32>(1) << (shift_table[bits >> 23])) - 1)) != 0) |
-          (((bits >> 23) <= 102) & ((bits >> 23) != 0))) &
-         (hbits < 0x7C00)) -
-        ((hbits == 0xFC00) & ((bits >> 23) != 511));
-  else if (R == std::round_toward_neg_infinity)
-    hbits +=
-        ((((bits & 0x7FFFFF &
-            ((static_cast<uint32>(1) << (shift_table[bits >> 23])) - 1)) != 0) |
-          (((bits >> 23) <= 358) & ((bits >> 23) != 256))) &
-         (hbits < 0xFC00) & (hbits >> 15)) -
-        ((hbits == 0x7C00) & ((bits >> 23) != 255));
-  return hbits;
-}
-
-/// Convert IEEE double-precision to half-precision.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \param value double-precision value
-/// \return binary representation of half-precision value
-template <std::float_round_style R>
-uint16 float2half_impl(double value, true_type) {
-  typedef bits<float>::type uint32;
-  typedef bits<double>::type uint64;
-  uint64 bits; // = *reinterpret_cast<uint64*>(&value);
-               // //violating
-               // strict aliasing!
-  std::memcpy(&bits, &value, sizeof(double));
-  uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF;
-  uint16 hbits = (hi >> 16) & 0x8000;
-  hi &= 0x7FFFFFFF;
-  int exp = hi >> 20;
-  if (exp == 2047)
-    return hbits | 0x7C00 |
-           (0x3FF & -static_cast<unsigned>((bits & 0xFFFFFFFFFFFFF) != 0));
-  if (exp > 1038) {
-    if (R == std::round_toward_infinity)
-      return hbits | 0x7C00 - (hbits >> 15);
-    if (R == std::round_toward_neg_infinity)
-      return hbits | 0x7BFF + (hbits >> 15);
-    return hbits | 0x7BFF + (R != std::round_toward_zero);
-  }
-  int g, s = lo != 0;
-  if (exp > 1008) {
-    g = (hi >> 9) & 1;
-    s |= (hi & 0x1FF) != 0;
-    hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF);
-  } else if (exp > 997) {
-    int i = 1018 - exp;
-    hi = (hi & 0xFFFFF) | 0x100000;
-    g = (hi >> i) & 1;
-    s |= (hi & ((1L << i) - 1)) != 0;
-    hbits |= hi >> (i + 1);
-  } else {
-    g = 0;
-    s |= hi != 0;
-  }
-  if (R == std::round_to_nearest)
-#if HALF_ROUND_TIES_TO_EVEN
-    hbits += g & (s | hbits);
-#else
-    hbits += g;
-#endif
-  else if (R == std::round_toward_infinity)
-    hbits += ~(hbits >> 15) & (s | g);
-  else if (R == std::round_toward_neg_infinity)
-    hbits += (hbits >> 15) & (g | s);
-  return hbits;
-}
-
-/// Convert non-IEEE floating point to half-precision.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam T source type (builtin floating point type)
-/// \param value floating point value
-/// \return binary representation of half-precision value
-template <std::float_round_style R, typename T>
-uint16 float2half_impl(T value, ...) {
-  uint16 hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
-  if (value == T())
-    return hbits;
-  if (builtin_isnan(value))
-    return hbits | 0x7FFF;
-  if (builtin_isinf(value))
-    return hbits | 0x7C00;
-  int exp;
-  std::frexp(value, &exp);
-  if (exp > 16) {
-    if (R == std::round_toward_infinity)
-      return hbits | (0x7C00 - (hbits >> 15));
-    else if (R == std::round_toward_neg_infinity)
-      return hbits | (0x7BFF + (hbits >> 15));
-    return hbits | (0x7BFF + (R != std::round_toward_zero));
-  }
-  if (exp < -13)
-    value = std::ldexp(value, 24);
-  else {
-    value = std::ldexp(value, 11 - exp);
-    hbits |= ((exp + 13) << 10);
-  }
-  T ival, frac = std::modf(value, &ival);
-  hbits += static_cast<uint16>(std::abs(static_cast<int>(ival)));
-  if (R == std::round_to_nearest) {
-    frac = std::abs(frac);
-#if HALF_ROUND_TIES_TO_EVEN
-    hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits);
-#else
-    hbits += frac >= T(0.5);
-#endif
-  } else if (R == std::round_toward_infinity)
-    hbits += frac > T();
-  else if (R == std::round_toward_neg_infinity)
-    hbits += frac < T();
-  return hbits;
-}
-
-/// Convert floating point to half-precision.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam T source type (builtin floating point type)
-/// \param value floating point value
-/// \return binary representation of half-precision value
-template <std::float_round_style R, typename T> uint16 float2half(T value) {
-  return float2half_impl<R>(
-      value, bool_type < std::numeric_limits<T>::is_iec559 &&
-                 sizeof(typename bits<T>::type) == sizeof(T) > ());
-}
-
-/// Convert integer to half-precision floating point.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam S `true` if value negative, `false` else
-/// \tparam T type to convert (builtin integer type)
-/// \param value non-negative integral value
-/// \return binary representation of half-precision value
-template <std::float_round_style R, bool S, typename T>
-uint16 int2half_impl(T value) {
-#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
-  static_assert(std::is_integral<T>::value,
-                "int to half conversion only supports builtin integer types");
-#endif
-  if (S)
-    value = -value;
-  uint16 bits = S << 15;
-  if (value > 0xFFFF) {
-    if (R == std::round_toward_infinity)
-      bits |= 0x7C00 - S;
-    else if (R == std::round_toward_neg_infinity)
-      bits |= 0x7BFF + S;
-    else
-      bits |= 0x7BFF + (R != std::round_toward_zero);
-  } else if (value) {
-    uint32_t m = value, exp = 24;
-    for (; m < 0x400; m <<= 1, --exp)
-      ;
-    for (; m > 0x7FF; m >>= 1, ++exp)
-      ;
-    bits |= (exp << 10) + m;
-    if (exp > 24) {
-      if (R == std::round_to_nearest)
-        bits += (value >> (exp - 25)) & 1
-#if HALF_ROUND_TIES_TO_EVEN
-                & (((((1 << (exp - 25)) - 1) & value) != 0) | bits)
-#endif
-            ;
-      else if (R == std::round_toward_infinity)
-        bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S;
-      else if (R == std::round_toward_neg_infinity)
-        bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S;
-    }
-  }
-  return bits;
-}
-
-/// Convert integer to half-precision floating point.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam T type to convert (builtin integer type)
-/// \param value integral value
-/// \return binary representation of half-precision value
-template <std::float_round_style R, typename T> uint16 int2half(T value) {
-  return (value < 0) ? int2half_impl<R, true>(value)
-                     : int2half_impl<R, false>(value);
-}
-
-/// Convert half-precision to IEEE single-precision.
-/// Credit for this goes to [Jeroen van der
-/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
-/// \param value binary representation of half-precision value
-/// \return single-precision value
-inline float half2float_impl(uint16 value, float, true_type) {
-  typedef bits<float>::type uint32;
-  /*			uint32 bits = static_cast<uint32>(value&0x8000) << 16;
-              int abs = value & 0x7FFF;
-              if(abs)
-              {
-                  bits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
-                  for(; abs<0x400; abs<<=1,bits-=0x800000) ;
-                  bits += static_cast<uint32>(abs) << 13;
-              }
-  */
-  static const uint32 mantissa_table[2048] = {
-      0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000,
-      0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
-      0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
-      0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000,
-      0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000,
-      0x35F00000, 0x35F80000, 0x36000000, 0x36040000, 0x36080000, 0x360C0000,
-      0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000,
-      0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
-      0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000,
-      0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000,
-      0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000,
-      0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000,
-      0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000,
-      0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000,
-      0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000,
-      0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
-      0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000,
-      0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000,
-      0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000,
-      0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000,
-      0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000,
-      0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
-      0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
-      0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
-      0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
-      0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000,
-      0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 0x37200000, 0x37210000,
-      0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
-      0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000,
-      0x372E0000, 0x372F0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
-      0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
-      0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
-      0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
-      0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000,
-      0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000,
-      0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
-      0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000,
-      0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
-      0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
-      0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
-      0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
-      0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000,
-      0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000,
-      0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
-      0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
-      0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
-      0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000,
-      0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
-      0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
-      0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
-      0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
-      0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000,
-      0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000,
-      0x379F0000, 0x379F8000, 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000,
-      0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000,
-      0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
-      0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000,
-      0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000,
-      0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000,
-      0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000,
-      0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000,
-      0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000,
-      0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000,
-      0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
-      0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000,
-      0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000,
-      0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000,
-      0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000,
-      0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000,
-      0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000,
-      0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000,
-      0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
-      0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000,
-      0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000,
-      0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 0x37E00000, 0x37E08000,
-      0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000,
-      0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000,
-      0x37E70000, 0x37E78000, 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000,
-      0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000,
-      0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
-      0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000,
-      0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000,
-      0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000,
-      0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000,
-      0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000,
-      0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000,
-      0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000,
-      0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
-      0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000,
-      0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000,
-      0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000,
-      0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000,
-      0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000,
-      0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000,
-      0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000,
-      0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
-      0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000,
-      0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000,
-      0x38130000, 0x38134000, 0x38138000, 0x3813C000, 0x38140000, 0x38144000,
-      0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000,
-      0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000,
-      0x38178000, 0x3817C000, 0x38180000, 0x38184000, 0x38188000, 0x3818C000,
-      0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000,
-      0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
-      0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000,
-      0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000,
-      0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000,
-      0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000,
-      0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000,
-      0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000,
-      0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000,
-      0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
-      0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000,
-      0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000,
-      0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000,
-      0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000,
-      0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000,
-      0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000,
-      0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000,
-      0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
-      0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000,
-      0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000,
-      0x38370000, 0x38374000, 0x38378000, 0x3837C000, 0x38380000, 0x38384000,
-      0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000,
-      0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000,
-      0x383B8000, 0x383BC000, 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000,
-      0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000,
-      0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
-      0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000,
-      0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000,
-      0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000,
-      0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000,
-      0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000,
-      0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000,
-      0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000,
-      0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
-      0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000,
-      0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000,
-      0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000,
-      0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000,
-      0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000,
-      0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000,
-      0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000,
-      0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
-      0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000,
-      0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000,
-      0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 0x385C0000, 0x385C4000,
-      0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000,
-      0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000,
-      0x385F8000, 0x385FC000, 0x38600000, 0x38604000, 0x38608000, 0x3860C000,
-      0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000,
-      0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
-      0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000,
-      0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000,
-      0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000,
-      0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000,
-      0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000,
-      0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000,
-      0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000,
-      0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
-      0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000,
-      0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000,
-      0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000,
-      0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000,
-      0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000,
-      0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000,
-      0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000,
-      0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
-      0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000,
-      0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000,
-      0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 0x38000000, 0x38002000,
-      0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000,
-      0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000,
-      0x3801C000, 0x3801E000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
-      0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000,
-      0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
-      0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000,
-      0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
-      0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000,
-      0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000,
-      0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000,
-      0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
-      0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000,
-      0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
-      0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000,
-      0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000,
-      0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000,
-      0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000,
-      0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000,
-      0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000,
-      0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000,
-      0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
-      0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000,
-      0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
-      0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 0x38120000, 0x38122000,
-      0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000,
-      0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000,
-      0x3813C000, 0x3813E000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
-      0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000,
-      0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
-      0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000,
-      0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
-      0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000,
-      0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000,
-      0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000,
-      0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000,
-      0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000,
-      0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
-      0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000,
-      0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000,
-      0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000,
-      0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000,
-      0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000,
-      0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
-      0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000,
-      0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
-      0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000,
-      0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
-      0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 0x38240000, 0x38242000,
-      0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000,
-      0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000,
-      0x3825C000, 0x3825E000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
-      0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000,
-      0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
-      0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000,
-      0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
-      0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000,
-      0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000,
-      0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000,
-      0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000,
-      0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000,
-      0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
-      0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000,
-      0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000,
-      0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000,
-      0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000,
-      0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000,
-      0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
-      0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000,
-      0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
-      0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000,
-      0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
-      0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 0x38360000, 0x38362000,
-      0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000,
-      0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000,
-      0x3837C000, 0x3837E000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
-      0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000,
-      0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
-      0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000,
-      0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000,
-      0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000,
-      0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000,
-      0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000,
-      0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000,
-      0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000,
-      0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
-      0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000,
-      0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
-      0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000,
-      0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000,
-      0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000,
-      0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
-      0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000,
-      0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
-      0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000,
-      0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
-      0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 0x38480000, 0x38482000,
-      0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000,
-      0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000,
-      0x3849C000, 0x3849E000, 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000,
-      0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000,
-      0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
-      0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000,
-      0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000,
-      0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000,
-      0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000,
-      0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000,
-      0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
-      0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000,
-      0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
-      0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000,
-      0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
-      0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000,
-      0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000,
-      0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000,
-      0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
-      0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000,
-      0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
-      0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000,
-      0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
-      0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 0x385A0000, 0x385A2000,
-      0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000,
-      0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000,
-      0x385BC000, 0x385BE000, 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000,
-      0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000,
-      0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
-      0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000,
-      0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000,
-      0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000,
-      0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000,
-      0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000,
-      0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
-      0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000,
-      0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
-      0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000,
-      0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
-      0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000,
-      0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000,
-      0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000,
-      0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
-      0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000,
-      0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
-      0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000,
-      0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000,
-      0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 0x386C0000, 0x386C2000,
-      0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000,
-      0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000,
-      0x386DC000, 0x386DE000, 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000,
-      0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000,
-      0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
-      0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000,
-      0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
-      0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000,
-      0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000,
-      0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000,
-      0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
-      0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000,
-      0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
-      0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000,
-      0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
-      0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000,
-      0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000,
-      0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000,
-      0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000,
-      0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000,
-      0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
-      0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000,
-      0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000,
-      0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 0x387E0000, 0x387E2000,
-      0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000,
-      0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000,
-      0x387FC000, 0x387FE000};
-  static const uint32 exponent_table[64] = {
-      0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
-      0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
-      0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
-      0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000,
-      0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000,
-      0x0F000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
-      0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
-      0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
-      0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000,
-      0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000,
-      0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000};
-  static const unsigned short offset_table[64] = {
-      0,    1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
-      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
-      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0,
-      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
-      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
-      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024};
-  uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] +
-                exponent_table[value >> 10];
-  //			return *reinterpret_cast<float*>(&bits);
-  ////violating
-  // strict aliasing!
-  float out;
-  std::memcpy(&out, &bits, sizeof(float));
-  return out;
-}
-
-/// Convert half-precision to IEEE double-precision.
-/// \param value binary representation of half-precision value
-/// \return double-precision value
-inline double half2float_impl(uint16 value, double, true_type) {
-  typedef bits<float>::type uint32;
-  typedef bits<double>::type uint64;
-  uint32 hi = static_cast<uint32>(value & 0x8000) << 16;
-  int abs = value & 0x7FFF;
-  if (abs) {
-    hi |= 0x3F000000 << static_cast<unsigned>(abs >= 0x7C00);
-    for (; abs < 0x400; abs <<= 1, hi -= 0x100000)
-      ;
-    hi += static_cast<uint32>(abs) << 10;
-  }
-  uint64 bits = static_cast<uint64>(hi) << 32;
-  //			return *reinterpret_cast<double*>(&bits);
-  ////violating
-  // strict aliasing!
-  double out;
-  std::memcpy(&out, &bits, sizeof(double));
-  return out;
-}
-
-/// Convert half-precision to non-IEEE floating point.
-/// \tparam T type to convert to (builtin integer type)
-/// \param value binary representation of half-precision value
-/// \return floating point value
-template <typename T> T half2float_impl(uint16 value, T, ...) {
-  T out;
-  int abs = value & 0x7FFF;
-  if (abs > 0x7C00)
-    out = std::numeric_limits<T>::has_quiet_NaN
-              ? std::numeric_limits<T>::quiet_NaN()
-              : T();
-  else if (abs == 0x7C00)
-    out = std::numeric_limits<T>::has_infinity
-              ? std::numeric_limits<T>::infinity()
-              : std::numeric_limits<T>::max();
-  else if (abs > 0x3FF)
-    out = std::ldexp(static_cast<T>((abs & 0x3FF) | 0x400), (abs >> 10) - 25);
-  else
-    out = std::ldexp(static_cast<T>(abs), -24);
-  return (value & 0x8000) ? -out : out;
-}
-
-/// Convert half-precision to floating point.
-/// \tparam T type to convert to (builtin integer type)
-/// \param value binary representation of half-precision value
-/// \return floating point value
-template <typename T> T half2float(uint16 value) {
-  return half2float_impl(value, T(),
-                         bool_type < std::numeric_limits<T>::is_iec559 &&
-                             sizeof(typename bits<T>::type) == sizeof(T) > ());
-}
-
-/// Convert half-precision floating point to integer.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam E `true` for round to even, `false` for round away from zero
-/// \tparam T type to convert to (buitlin integer type with at least 16 bits
-/// precision, excluding any implicit sign
-/// bits) \param value binary representation of half-precision value \return
-/// integral value
-template <std::float_round_style R, bool E, typename T>
-T half2int_impl(uint16 value) {
-#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
-  static_assert(std::is_integral<T>::value,
-                "half to int conversion only supports builtin integer types");
-#endif
-  uint32_t e = value & 0x7FFF;
-  if (e >= 0x7C00)
-    return (value & 0x8000) ? std::numeric_limits<T>::min()
-                            : std::numeric_limits<T>::max();
-  if (e < 0x3800) {
-    if (R == std::round_toward_infinity)
-      return T(~(value >> 15) & (e != 0));
-    else if (R == std::round_toward_neg_infinity)
-      return -T(value > 0x8000);
-    return T();
-  }
-  uint32_t m = (value & 0x3FF) | 0x400;
-  e >>= 10;
-  if (e < 25) {
-    if (R == std::round_to_nearest)
-      m += (1 << (24 - e)) - (~(m >> (25 - e)) & E);
-    else if (R == std::round_toward_infinity)
-      m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U);
-    else if (R == std::round_toward_neg_infinity)
-      m += -(value >> 15) & ((1 << (25 - e)) - 1U);
-    m >>= 25 - e;
-  } else
-    m <<= e - 25;
-  return (value & 0x8000) ? -static_cast<T>(m) : static_cast<T>(m);
-}
-
-/// Convert half-precision floating point to integer.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam T type to convert to (buitlin integer type with at least 16 bits
-/// precision, excluding any implicit sign
-/// bits) \param value binary representation of half-precision value \return
-/// integral value
-template <std::float_round_style R, typename T> T half2int(uint16 value) {
-  return half2int_impl<R, HALF_ROUND_TIES_TO_EVEN, T>(value);
-}
-
-/// Convert half-precision floating point to integer using
-/// round-to-nearest-away-from-zero.
-/// \tparam T type to convert to (buitlin integer type with at least 16 bits
-/// precision, excluding any implicit sign
-/// bits) \param value binary representation of half-precision value \return
-/// integral value
-template <typename T> T half2int_up(uint16 value) {
-  return half2int_impl<std::round_to_nearest, 0, T>(value);
-}
-
-/// Round half-precision number to nearest integer value.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \tparam E `true` for round to even, `false` for round away from zero
-/// \param value binary representation of half-precision value
-/// \return half-precision bits for nearest integral value
-template <std::float_round_style R, bool E>
-uint16 round_half_impl(uint16 value) {
-  uint32_t e = value & 0x7FFF;
-  uint16 result = value;
-  if (e < 0x3C00) {
-    result &= 0x8000;
-    if (R == std::round_to_nearest)
-      result |= 0x3C00U & -(e >= (0x3800 + E));
-    else if (R == std::round_toward_infinity)
-      result |= 0x3C00U & -(~(value >> 15) & (e != 0));
-    else if (R == std::round_toward_neg_infinity)
-      result |= 0x3C00U & -(value > 0x8000);
-  } else if (e < 0x6400) {
-    e = 25 - (e >> 10);
-    uint32_t mask = (1 << e) - 1;
-    if (R == std::round_to_nearest)
-      result += (1 << (e - 1)) - (~(result >> e) & E);
-    else if (R == std::round_toward_infinity)
-      result += mask & ((value >> 15) - 1);
-    else if (R == std::round_toward_neg_infinity)
-      result += mask & -(value >> 15);
-    result &= ~mask;
-  }
-  return result;
-}
-
-/// Round half-precision number to nearest integer value.
-/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest
-/// rounding
-/// \param value binary representation of half-precision value
-/// \return half-precision bits for nearest integral value
-template <std::float_round_style R> uint16 round_half(uint16 value) {
-  return round_half_impl<R, HALF_ROUND_TIES_TO_EVEN>(value);
-}
-
-/// Round half-precision number to nearest integer value using
-/// round-to-nearest-away-from-zero.
-/// \param value binary representation of half-precision value
-/// \return half-precision bits for nearest integral value
-inline uint16 round_half_up(uint16 value) {
-  return round_half_impl<std::round_to_nearest, 0>(value);
-}
-/// \}
-
-struct functions;
-template <typename> struct unary_specialized;
-template <typename, typename> struct binary_specialized;
-template <typename, typename, std::float_round_style> struct half_caster;
-} // namespace detail
-
-/// Half-precision floating point type.
-/// This class implements an IEEE-conformant half-precision floating point type
-/// with the usual arithmetic operators and
-/// conversions. It is implicitly convertible to single-precision floating
-/// point, which makes artihmetic expressions and
-/// functions with mixed-type operands to be of the most precise operand type.
-/// Additionally all arithmetic operations
-/// (and many mathematical functions) are carried out in single-precision
-/// internally. All conversions from single- to
-/// half-precision are done using the library's default rounding mode, but
-/// temporary results inside chained arithmetic
-/// expressions are kept in single-precision as long as possible (while of
-/// course still maintaining a strong
-/// half-precision type).
-///
-/// According to the C++98/03 definition, the half type is not a POD type. But
-/// according to C++11's less strict and
-/// extended definitions it is both a standard layout type and a trivially
-/// copyable type (even if not a POD type), which
-/// means it can be standard-conformantly copied using raw binary copies. But in
-/// this context some more words about the
-/// actual size of the type. Although the half is representing an IEEE 16-bit
-/// type, it does not neccessarily have to be
-/// of exactly 16-bits size. But on any reasonable implementation the actual
-/// binary representation of this type will
-/// most probably not ivolve any additional "magic" or padding beyond the simple
-/// binary representation of the underlying
-/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But
-/// even then it only has an actual size of 16
-/// bits if your C++ implementation supports an unsigned integer type of exactly
-/// 16 bits width. But this should be the
-/// case on nearly any reasonable platform.
-///
-/// So if your C++ implementation is not totally exotic or imposes special
-/// alignment requirements, it is a reasonable
-/// assumption that the data of a half is just comprised of the 2 bytes of the
-/// underlying IEEE representation.
-class half {
-  friend struct detail::functions;
-  friend struct detail::unary_specialized<half>;
-  friend struct detail::binary_specialized<half, half>;
-  template <typename, typename, std::float_round_style>
-  friend struct detail::half_caster;
-  friend class std::numeric_limits<half>;
-#if HALF_ENABLE_CPP11_HASH
-  friend struct std::hash<half>;
-#endif
-#if HALF_ENABLE_CPP11_USER_LITERALS
-  friend half literal::operator"" _h(long double);
-#endif
-
- public:
-  /// Default constructor.
-  /// This initializes the half to 0. Although this does not match the builtin
-  /// types' default-initialization semantics
-  /// and may be less efficient than no initialization, it is needed to provide
-  /// proper value-initialization semantics.
-  HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {}
-
-  /// Copy constructor.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to copy from
-  half(detail::expr rhs)
-      : data_(detail::float2half<round_style>(static_cast<float>(rhs))) {}
-
-  /// Conversion constructor.
-  /// \param rhs float to convert
-  explicit half(float rhs) : data_(detail::float2half<round_style>(rhs)) {}
-
-  /// Conversion to single-precision.
-  /// \return single precision value representing expression value
-  operator float() const { return detail::half2float<float>(data_); }
-
-  /// Assignment operator.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to copy from
-  /// \return reference to this half
-  half& operator=(detail::expr rhs) { return *this = static_cast<float>(rhs); }
-
-  /// Arithmetic assignment.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to add
-  /// \return reference to this half
-  template <typename T>
-  typename detail::enable<half&, T>::type operator+=(T rhs) {
-    return *this += static_cast<float>(rhs);
-  }
-
-  /// Arithmetic assignment.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to subtract
-  /// \return reference to this half
-  template <typename T>
-  typename detail::enable<half&, T>::type operator-=(T rhs) {
-    return *this -= static_cast<float>(rhs);
-  }
-
-  /// Arithmetic assignment.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to multiply with
-  /// \return reference to this half
-  template <typename T>
-  typename detail::enable<half&, T>::type operator*=(T rhs) {
-    return *this *= static_cast<float>(rhs);
-  }
-
-  /// Arithmetic assignment.
-  /// \tparam T type of concrete half expression
-  /// \param rhs half expression to divide by
-  /// \return reference to this half
-  template <typename T>
-  typename detail::enable<half&, T>::type operator/=(T rhs) {
-    return *this /= static_cast<float>(rhs);
-  }
-
-  /// Assignment operator.
-  /// \param rhs single-precision value to copy from
-  /// \return reference to this half
-  half& operator=(float rhs) {
-    data_ = detail::float2half<round_style>(rhs);
-    return *this;
-  }
-
-  /// Arithmetic assignment.
-  /// \param rhs single-precision value to add
-  /// \return reference to this half
-  half& operator+=(float rhs) {
-    data_ =
-        detail::float2half<round_style>(detail::half2float<float>(data_) + rhs);
-    return *this;
-  }
-
-  /// Arithmetic assignment.
-  /// \param rhs single-precision value to subtract
-  /// \return reference to this half
-  half& operator-=(float rhs) {
-    data_ =
-        detail::float2half<round_style>(detail::half2float<float>(data_) - rhs);
-    return *this;
-  }
-
-  /// Arithmetic assignment.
-  /// \param rhs single-precision value to multiply with
-  /// \return reference to this half
-  half& operator*=(float rhs) {
-    data_ =
-        detail::float2half<round_style>(detail::half2float<float>(data_) * rhs);
-    return *this;
-  }
-
-  /// Arithmetic assignment.
-  /// \param rhs single-precision value to divide by
-  /// \return reference to this half
-  half& operator/=(float rhs) {
-    data_ =
-        detail::float2half<round_style>(detail::half2float<float>(data_) / rhs);
-    return *this;
-  }
-
-  /// Prefix increment.
-  /// \return incremented half value
-  half& operator++() { return *this += 1.0f; }
-
-  /// Prefix decrement.
-  /// \return decremented half value
-  half& operator--() { return *this -= 1.0f; }
-
-  /// Postfix increment.
-  /// \return non-incremented half value
-  half operator++(int) {
-    half out(*this);
-    ++*this;
-    return out;
-  }
-
-  /// Postfix decrement.
-  /// \return non-decremented half value
-  half operator--(int) {
-    half out(*this);
-    --*this;
-    return out;
-  }
-
- private:
-  /// Rounding mode to use
-  static const std::float_round_style round_style =
-      (std::float_round_style)(HALF_ROUND_STYLE);
-
-  /// Constructor.
-  /// \param bits binary representation to set half to
-  HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT
-      : data_(bits) {}
-
-  /// Internal binary representation
-  detail::uint16 data_;
-};
-
-#if HALF_ENABLE_CPP11_USER_LITERALS
-namespace literal {
-/// Half literal.
-/// While this returns an actual half-precision value, half literals can
-/// unfortunately not be constant expressions due
-/// to rather involved conversions.
-/// \param value literal value
-/// \return half with given value (if representable)
-inline half operator"" _h(long double value) {
-  return half(detail::binary, detail::float2half<half::round_style>(value));
-}
-} // namespace literal
-#endif
-
-namespace detail {
-/// Wrapper implementing unspecialized half-precision functions.
-struct functions {
-  /// Addition implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision sum stored in single-precision
-  static expr plus(float x, float y) { return expr(x + y); }
-
-  /// Subtraction implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision difference stored in single-precision
-  static expr minus(float x, float y) { return expr(x - y); }
-
-  /// Multiplication implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision product stored in single-precision
-  static expr multiplies(float x, float y) { return expr(x * y); }
-
-  /// Division implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision quotient stored in single-precision
-  static expr divides(float x, float y) { return expr(x / y); }
-
-  /// Output implementation.
-  /// \param out stream to write to
-  /// \param arg value to write
-  /// \return reference to stream
-  template <typename charT, typename traits>
-  static std::basic_ostream<charT, traits>&
-  write(std::basic_ostream<charT, traits>& out, float arg) {
-    return out << arg;
-  }
-
-  /// Input implementation.
-  /// \param in stream to read from
-  /// \param arg half to read into
-  /// \return reference to stream
-  template <typename charT, typename traits>
-  static std::basic_istream<charT, traits>&
-  read(std::basic_istream<charT, traits>& in, half& arg) {
-    float f;
-    if (in >> f)
-      arg = f;
-    return in;
-  }
-
-  /// Modulo implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision division remainder stored in single-precision
-  static expr fmod(float x, float y) { return expr(std::fmod(x, y)); }
-
-  /// Remainder implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Half-precision division remainder stored in single-precision
-  static expr remainder(float x, float y) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::remainder(x, y));
-#else
-    if (builtin_isnan(x) || builtin_isnan(y))
-      return expr(std::numeric_limits<float>::quiet_NaN());
-    float ax = std::fabs(x), ay = std::fabs(y);
-    if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
-      return expr(std::numeric_limits<float>::quiet_NaN());
-    if (ay >= 65536.0f)
-      return expr(x);
-    if (ax == ay)
-      return expr(builtin_signbit(x) ? -0.0f : 0.0f);
-    ax = std::fmod(ax, ay + ay);
-    float y2 = 0.5f * ay;
-    if (ax > y2) {
-      ax -= ay;
-      if (ax >= y2)
-        ax -= ay;
-    }
-    return expr(builtin_signbit(x) ? -ax : ax);
-#endif
-  }
-
-  /// Remainder implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \param quo address to store quotient bits at
-  /// \return Half-precision division remainder stored in single-precision
-  static expr remquo(float x, float y, int* quo) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::remquo(x, y, quo));
-#else
-    if (builtin_isnan(x) || builtin_isnan(y))
-      return expr(std::numeric_limits<float>::quiet_NaN());
-    bool sign = builtin_signbit(x),
-         qsign = static_cast<bool>(sign ^ builtin_signbit(y));
-    float ax = std::fabs(x), ay = std::fabs(y);
-    if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
-      return expr(std::numeric_limits<float>::quiet_NaN());
-    if (ay >= 65536.0f)
-      return expr(x);
-    if (ax == ay)
-      return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f);
-    ax = std::fmod(ax, 8.0f * ay);
-    int cquo = 0;
-    if (ax >= 4.0f * ay) {
-      ax -= 4.0f * ay;
-      cquo += 4;
-    }
-    if (ax >= 2.0f * ay) {
-      ax -= 2.0f * ay;
-      cquo += 2;
-    }
-    float y2 = 0.5f * ay;
-    if (ax > y2) {
-      ax -= ay;
-      ++cquo;
-      if (ax >= y2) {
-        ax -= ay;
-        ++cquo;
-      }
-    }
-    return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax);
-#endif
-  }
-
-  /// Positive difference implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return Positive difference stored in single-precision
-  static expr fdim(float x, float y) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::fdim(x, y));
-#else
-    return expr((x <= y) ? 0.0f : (x - y));
-#endif
-  }
-
-  /// Fused multiply-add implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \param z third operand
-  /// \return \a x * \a y + \a z stored in single-precision
-  static expr fma(float x, float y, float z) {
-#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF)
-    return expr(std::fma(x, y, z));
-#else
-    return expr(x * y + z);
-#endif
-  }
-
-  /// Get NaN.
-  /// \return Half-precision quiet NaN
-  static half nanh() { return half(binary, 0x7FFF); }
-
-  /// Exponential implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr exp(float arg) { return expr(std::exp(arg)); }
-
-  /// Exponential implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr expm1(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::expm1(arg));
-#else
-    return expr(static_cast<float>(std::exp(static_cast<double>(arg)) - 1.0));
-#endif
-  }
-
-  /// Binary exponential implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr exp2(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::exp2(arg));
-#else
-    return expr(
-        static_cast<float>(std::exp(arg * 0.69314718055994530941723212145818)));
-#endif
-  }
-
-  /// Logarithm implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr log(float arg) { return expr(std::log(arg)); }
-
-  /// Common logarithm implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr log10(float arg) { return expr(std::log10(arg)); }
-
-  /// Logarithm implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr log1p(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::log1p(arg));
-#else
-    return expr(static_cast<float>(std::log(1.0 + arg)));
-#endif
-  }
-
-  /// Binary logarithm implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr log2(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::log2(arg));
-#else
-    return expr(static_cast<float>(std::log(static_cast<double>(arg)) *
-                                   1.4426950408889634073599246810019));
-#endif
-  }
-
-  /// Square root implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr sqrt(float arg) { return expr(std::sqrt(arg)); }
-
-  /// Cubic root implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr cbrt(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::cbrt(arg));
-#else
-    if (builtin_isnan(arg) || builtin_isinf(arg))
-      return expr(arg);
-    return expr(builtin_signbit(arg)
-                    ? -static_cast<float>(
-                          std::pow(-static_cast<double>(arg), 1.0 / 3.0))
-                    : static_cast<float>(
-                          std::pow(static_cast<double>(arg), 1.0 / 3.0)));
-#endif
-  }
-
-  /// Hypotenuse implementation.
-  /// \param x first argument
-  /// \param y second argument
-  /// \return function value stored in single-preicision
-  static expr hypot(float x, float y) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::hypot(x, y));
-#else
-    return expr(
-        (builtin_isinf(x) || builtin_isinf(y))
-            ? std::numeric_limits<float>::infinity()
-            : static_cast<float>(std::sqrt(static_cast<double>(x) * x +
-                                           static_cast<double>(y) * y)));
-#endif
-  }
-
-  /// Power implementation.
-  /// \param base value to exponentiate
-  /// \param exp power to expontiate to
-  /// \return function value stored in single-preicision
-  static expr pow(float base, float exp) { return expr(std::pow(base, exp)); }
-
-  /// Sine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr sin(float arg) { return expr(std::sin(arg)); }
-
-  /// Cosine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr cos(float arg) { return expr(std::cos(arg)); }
-
-  /// Tan implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr tan(float arg) { return expr(std::tan(arg)); }
-
-  /// Arc sine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr asin(float arg) { return expr(std::asin(arg)); }
-
-  /// Arc cosine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr acos(float arg) { return expr(std::acos(arg)); }
-
-  /// Arc tangent implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr atan(float arg) { return expr(std::atan(arg)); }
-
-  /// Arc tangent implementation.
-  /// \param x first argument
-  /// \param y second argument
-  /// \return function value stored in single-preicision
-  static expr atan2(float x, float y) { return expr(std::atan2(x, y)); }
-
-  /// Hyperbolic sine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr sinh(float arg) { return expr(std::sinh(arg)); }
-
-  /// Hyperbolic cosine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr cosh(float arg) { return expr(std::cosh(arg)); }
-
-  /// Hyperbolic tangent implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr tanh(float arg) { return expr(std::tanh(arg)); }
-
-  /// Hyperbolic area sine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr asinh(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::asinh(arg));
-#else
-    return expr(
-        (arg == -std::numeric_limits<float>::infinity())
-            ? arg
-            : static_cast<float>(std::log(arg + std::sqrt(arg * arg + 1.0))));
-#endif
-  }
-
-  /// Hyperbolic area cosine implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr acosh(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::acosh(arg));
-#else
-    return expr((arg < -1.0f) ? std::numeric_limits<float>::quiet_NaN()
-                              : static_cast<float>(std::log(
-                                    arg + std::sqrt(arg * arg - 1.0))));
-#endif
-  }
-
-  /// Hyperbolic area tangent implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr atanh(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::atanh(arg));
-#else
-    return expr(static_cast<float>(0.5 * std::log((1.0 + arg) / (1.0 - arg))));
-#endif
-  }
-
-  /// Error function implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr erf(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::erf(arg));
-#else
-    return expr(static_cast<float>(erf(static_cast<double>(arg))));
-#endif
-  }
-
-  /// Complementary implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr erfc(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::erfc(arg));
-#else
-    return expr(static_cast<float>(1.0 - erf(static_cast<double>(arg))));
-#endif
-  }
-
-  /// Gamma logarithm implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr lgamma(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::lgamma(arg));
-#else
-    if (builtin_isinf(arg))
-      return expr(std::numeric_limits<float>::infinity());
-    if (arg < 0.0f) {
-      float i, f = std::modf(-arg, &i);
-      if (f == 0.0f)
-        return expr(std::numeric_limits<float>::infinity());
-      return expr(static_cast<float>(
-          1.1447298858494001741434273513531 -
-          std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) -
-          lgamma(1.0 - arg)));
-    }
-    return expr(static_cast<float>(lgamma(static_cast<double>(arg))));
-#endif
-  }
-
-  /// Gamma implementation.
-  /// \param arg function argument
-  /// \return function value stored in single-preicision
-  static expr tgamma(float arg) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::tgamma(arg));
-#else
-    if (arg == 0.0f)
-      return builtin_signbit(arg)
-                 ? expr(-std::numeric_limits<float>::infinity())
-                 : expr(std::numeric_limits<float>::infinity());
-    if (arg < 0.0f) {
-      float i, f = std::modf(-arg, &i);
-      if (f == 0.0f)
-        return expr(std::numeric_limits<float>::quiet_NaN());
-      double value = 3.1415926535897932384626433832795 /
-                     (std::sin(3.1415926535897932384626433832795 * f) *
-                      std::exp(lgamma(1.0 - arg)));
-      return expr(
-          static_cast<float>((std::fmod(i, 2.0f) == 0.0f) ? -value : value));
-    }
-    if (builtin_isinf(arg))
-      return expr(arg);
-    return expr(static_cast<float>(std::exp(lgamma(static_cast<double>(arg)))));
-#endif
-  }
-
-  /// Floor implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static half floor(half arg) {
-    return half(binary, round_half<std::round_toward_neg_infinity>(arg.data_));
-  }
-
-  /// Ceiling implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static half ceil(half arg) {
-    return half(binary, round_half<std::round_toward_infinity>(arg.data_));
-  }
-
-  /// Truncation implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static half trunc(half arg) {
-    return half(binary, round_half<std::round_toward_zero>(arg.data_));
-  }
-
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static half round(half arg) { return half(binary, round_half_up(arg.data_)); }
-
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static long lround(half arg) { return detail::half2int_up<long>(arg.data_); }
-
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static half rint(half arg) {
-    return half(binary, round_half<half::round_style>(arg.data_));
-  }
-
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static long lrint(half arg) {
-    return detail::half2int<half::round_style, long>(arg.data_);
-  }
-
-#if HALF_ENABLE_CPP11_LONG_LONG
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static long long llround(half arg) {
-    return detail::half2int_up<long long>(arg.data_);
-  }
-
-  /// Nearest integer implementation.
-  /// \param arg value to round
-  /// \return rounded value
-  static long long llrint(half arg) {
-    return detail::half2int<half::round_style, long long>(arg.data_);
-  }
-#endif
-
-  /// Decompression implementation.
-  /// \param arg number to decompress
-  /// \param exp address to store exponent at
-  /// \return normalized significant
-  static half frexp(half arg, int* exp) {
-    int m = arg.data_ & 0x7FFF, e = -14;
-    if (m >= 0x7C00 || !m)
-      return *exp = 0, arg;
-    for (; m < 0x400; m <<= 1, --e)
-      ;
-    return *exp = e + (m >> 10),
-           half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF));
-  }
-
-  /// Decompression implementation.
-  /// \param arg number to decompress
-  /// \param iptr address to store integer part at
-  /// \return fractional part
-  static half modf(half arg, half* iptr) {
-    uint32_t e = arg.data_ & 0x7FFF;
-    if (e >= 0x6400)
-      return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00)));
-    if (e < 0x3C00)
-      return iptr->data_ = arg.data_ & 0x8000, arg;
-    e >>= 10;
-    uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask;
-    iptr->data_ = arg.data_ & ~mask;
-    if (!m)
-      return half(binary, arg.data_ & 0x8000);
-    for (; m < 0x400; m <<= 1, --e)
-      ;
-    return half(binary, static_cast<uint16>((arg.data_ & 0x8000) | (e << 10) |
-                                            (m & 0x3FF)));
-  }
-
-  /// Scaling implementation.
-  /// \param arg number to scale
-  /// \param exp power of two to scale by
-  /// \return scaled number
-  static half scalbln(half arg, long exp) {
-    uint32_t m = arg.data_ & 0x7FFF;
-    if (m >= 0x7C00 || !m)
-      return arg;
-    for (; m < 0x400; m <<= 1, --exp)
-      ;
-    exp += m >> 10;
-    uint16 value = arg.data_ & 0x8000;
-    if (exp > 30) {
-      if (half::round_style == std::round_toward_zero)
-        value |= 0x7BFF;
-      else if (half::round_style == std::round_toward_infinity)
-        value |= 0x7C00 - (value >> 15);
-      else if (half::round_style == std::round_toward_neg_infinity)
-        value |= 0x7BFF + (value >> 15);
-      else
-        value |= 0x7C00;
-    } else if (exp > 0)
-      value |= (exp << 10) | (m & 0x3FF);
-    else if (exp > -11) {
-      m = (m & 0x3FF) | 0x400;
-      if (half::round_style == std::round_to_nearest) {
-        m += 1 << -exp;
-#if HALF_ROUND_TIES_TO_EVEN
-        m -= (m >> (1 - exp)) & 1;
-#endif
-      } else if (half::round_style == std::round_toward_infinity)
-        m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U);
-      else if (half::round_style == std::round_toward_neg_infinity)
-        m += -(value >> 15) & ((1 << (1 - exp)) - 1U);
-      value |= m >> (1 - exp);
-    } else if (half::round_style == std::round_toward_infinity)
-      value -= (value >> 15) - 1;
-    else if (half::round_style == std::round_toward_neg_infinity)
-      value += value >> 15;
-    return half(binary, value);
-  }
-
-  /// Exponent implementation.
-  /// \param arg number to query
-  /// \return floating point exponent
-  static int ilogb(half arg) {
-    int abs = arg.data_ & 0x7FFF;
-    if (!abs)
-      return FP_ILOGB0;
-    if (abs < 0x7C00) {
-      int exp = (abs >> 10) - 15;
-      if (abs < 0x400)
-        for (; abs < 0x200; abs <<= 1, --exp)
-          ;
-      return exp;
-    }
-    if (abs > 0x7C00)
-      return FP_ILOGBNAN;
-    return INT_MAX;
-  }
-
-  /// Exponent implementation.
-  /// \param arg number to query
-  /// \return floating point exponent
-  static half logb(half arg) {
-    int abs = arg.data_ & 0x7FFF;
-    if (!abs)
-      return half(binary, 0xFC00);
-    if (abs < 0x7C00) {
-      int exp = (abs >> 10) - 15;
-      if (abs < 0x400)
-        for (; abs < 0x200; abs <<= 1, --exp)
-          ;
-      uint16 bits = (exp < 0) << 15;
-      if (exp) {
-        uint32_t m = std::abs(exp) << 6, e = 18;
-        for (; m < 0x400; m <<= 1, --e)
-          ;
-        bits |= (e << 10) + m;
-      }
-      return half(binary, bits);
-    }
-    if (abs > 0x7C00)
-      return arg;
-    return half(binary, 0x7C00);
-  }
-
-  /// Enumeration implementation.
-  /// \param from number to increase/decrease
-  /// \param to direction to enumerate into
-  /// \return next representable number
-  static half nextafter(half from, half to) {
-    uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
-    if (fabs > 0x7C00)
-      return from;
-    if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs))
-      return to;
-    if (!fabs)
-      return half(binary, (to.data_ & 0x8000) + 1);
-    bool lt =
-        ((fabs == from.data_) ? static_cast<int>(fabs)
-                              : -static_cast<int>(fabs)) <
-        ((tabs == to.data_) ? static_cast<int>(tabs) : -static_cast<int>(tabs));
-    return half(binary,
-                from.data_ +
-                    (((from.data_ >> 15) ^ static_cast<unsigned>(lt)) << 1) -
-                    1);
-  }
-
-  /// Enumeration implementation.
-  /// \param from number to increase/decrease
-  /// \param to direction to enumerate into
-  /// \return next representable number
-  static half nexttoward(half from, long double to) {
-    if (isnan(from))
-      return from;
-    long double lfrom = static_cast<long double>(from);
-    if (builtin_isnan(to) || lfrom == to)
-      return half(static_cast<float>(to));
-    if (!(from.data_ & 0x7FFF))
-      return half(binary,
-                  (static_cast<detail::uint16>(builtin_signbit(to)) << 15) + 1);
-    return half(
-        binary,
-        from.data_ +
-            (((from.data_ >> 15) ^ static_cast<unsigned>(lfrom < to)) << 1) -
-            1);
-  }
-
-  /// Sign implementation
-  /// \param x first operand
-  /// \param y second operand
-  /// \return composed value
-  static half copysign(half x, half y) {
-    return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000));
-  }
-
-  /// Classification implementation.
-  /// \param arg value to classify
-  /// \retval true if infinite number
-  /// \retval false else
-  static int fpclassify(half arg) {
-    uint32_t abs = arg.data_ & 0x7FFF;
-    return abs ? ((abs > 0x3FF) ? ((abs >= 0x7C00)
-                                       ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE)
-                                       : FP_NORMAL)
-                                : FP_SUBNORMAL)
-               : FP_ZERO;
-  }
-
-  /// Classification implementation.
-  /// \param arg value to classify
-  /// \retval true if finite number
-  /// \retval false else
-  static bool isfinite(half arg) { return (arg.data_ & 0x7C00) != 0x7C00; }
-
-  /// Classification implementation.
-  /// \param arg value to classify
-  /// \retval true if infinite number
-  /// \retval false else
-  static bool isinf(half arg) { return (arg.data_ & 0x7FFF) == 0x7C00; }
-
-  /// Classification implementation.
-  /// \param arg value to classify
-  /// \retval true if not a number
-  /// \retval false else
-  static bool isnan(half arg) { return (arg.data_ & 0x7FFF) > 0x7C00; }
-
-  /// Classification implementation.
-  /// \param arg value to classify
-  /// \retval true if normal number
-  /// \retval false else
-  static bool isnormal(half arg) {
-    return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00);
-  }
-
-  /// Sign bit implementation.
-  /// \param arg value to check
-  /// \retval true if signed
-  /// \retval false if unsigned
-  static bool signbit(half arg) { return (arg.data_ & 0x8000) != 0; }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if operands equal
-  /// \retval false else
-  static bool isequal(half x, half y) {
-    return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x);
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if operands not equal
-  /// \retval false else
-  static bool isnotequal(half x, half y) {
-    return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x);
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if \a x > \a y
-  /// \retval false else
-  static bool isgreater(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    return xabs <= 0x7C00 && yabs <= 0x7C00 &&
-           (((xabs == x.data_) ? xabs : -xabs) >
-            ((yabs == y.data_) ? yabs : -yabs));
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if \a x >= \a y
-  /// \retval false else
-  static bool isgreaterequal(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    return xabs <= 0x7C00 && yabs <= 0x7C00 &&
-           (((xabs == x.data_) ? xabs : -xabs) >=
-            ((yabs == y.data_) ? yabs : -yabs));
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if \a x < \a y
-  /// \retval false else
-  static bool isless(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    return xabs <= 0x7C00 && yabs <= 0x7C00 &&
-           (((xabs == x.data_) ? xabs : -xabs) <
-            ((yabs == y.data_) ? yabs : -yabs));
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if \a x <= \a y
-  /// \retval false else
-  static bool islessequal(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    return xabs <= 0x7C00 && yabs <= 0x7C00 &&
-           (((xabs == x.data_) ? xabs : -xabs) <=
-            ((yabs == y.data_) ? yabs : -yabs));
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if either \a x > \a y nor \a x < \a y
-  /// \retval false else
-  static bool islessgreater(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    if (xabs > 0x7C00 || yabs > 0x7C00)
-      return false;
-    int a = (xabs == x.data_) ? xabs : -xabs,
-        b = (yabs == y.data_) ? yabs : -yabs;
-    return a < b || a > b;
-  }
-
-  /// Comparison implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \retval true if operand unordered
-  /// \retval false else
-  static bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
-
- private:
-  static double erf(double arg) {
-    if (builtin_isinf(arg))
-      return (arg < 0.0) ? -1.0 : 1.0;
-    double x2 = arg * arg, ax2 = 0.147 * x2,
-           value = std::sqrt(
-               1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) /
-                              (1.0 + ax2)));
-    return builtin_signbit(arg) ? -value : value;
-  }
-
-  static double lgamma(double arg) {
-    double v = 1.0;
-    for (; arg < 8.0; ++arg)
-      v *= arg;
-    double w = 1.0 / (arg * arg);
-    return (((((((-0.02955065359477124183006535947712 * w +
-                  0.00641025641025641025641025641026) *
-                     w +
-                 -0.00191752691752691752691752691753) *
-                    w +
-                8.4175084175084175084175084175084e-4) *
-                   w +
-               -5.952380952380952380952380952381e-4) *
-                  w +
-              7.9365079365079365079365079365079e-4) *
-                 w +
-             -0.00277777777777777777777777777778) *
-                w +
-            0.08333333333333333333333333333333) /
-               arg +
-           0.91893853320467274178032973640562 - std::log(v) - arg +
-           (arg - 0.5) * std::log(arg);
-  }
-};
-
-/// Wrapper for unary half-precision functions needing specialization for
-/// individual argument types.
-/// \tparam T argument type
-template <typename T> struct unary_specialized {
-  /// Negation implementation.
-  /// \param arg value to negate
-  /// \return negated value
-  static HALF_CONSTEXPR half negate(half arg) {
-    return half(binary, arg.data_ ^ 0x8000);
-  }
-
-  /// Absolute value implementation.
-  /// \param arg function argument
-  /// \return absolute value
-  static half fabs(half arg) { return half(binary, arg.data_ & 0x7FFF); }
-};
-template <> struct unary_specialized<expr> {
-  static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); }
-  static expr fabs(float arg) { return expr(std::fabs(arg)); }
-};
-
-/// Wrapper for binary half-precision functions needing specialization for
-/// individual argument types.
-/// \tparam T first argument type
-/// \tparam U first argument type
-template <typename T, typename U> struct binary_specialized {
-  /// Minimum implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return minimum value
-  static expr fmin(float x, float y) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::fmin(x, y));
-#else
-    if (builtin_isnan(x))
-      return expr(y);
-    if (builtin_isnan(y))
-      return expr(x);
-    return expr(std::min(x, y));
-#endif
-  }
-
-  /// Maximum implementation.
-  /// \param x first operand
-  /// \param y second operand
-  /// \return maximum value
-  static expr fmax(float x, float y) {
-#if HALF_ENABLE_CPP11_CMATH
-    return expr(std::fmax(x, y));
-#else
-    if (builtin_isnan(x))
-      return expr(y);
-    if (builtin_isnan(y))
-      return expr(x);
-    return expr(std::max(x, y));
-#endif
-  }
-};
-template <> struct binary_specialized<half, half> {
-  static half fmin(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    if (xabs > 0x7C00)
-      return y;
-    if (yabs > 0x7C00)
-      return x;
-    return (((xabs == x.data_) ? xabs : -xabs) >
-            ((yabs == y.data_) ? yabs : -yabs))
-               ? y
-               : x;
-  }
-  static half fmax(half x, half y) {
-    int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
-    if (xabs > 0x7C00)
-      return y;
-    if (yabs > 0x7C00)
-      return x;
-    return (((xabs == x.data_) ? xabs : -xabs) <
-            ((yabs == y.data_) ? yabs : -yabs))
-               ? y
-               : x;
-  }
-};
-
-/// Helper class for half casts.
-/// This class template has to be specialized for all valid cast argument to
-/// define an appropriate static `cast` member
-/// function and a corresponding `type` member denoting its return type.
-/// \tparam T destination type
-/// \tparam U source type
-/// \tparam R rounding mode to use
-template <typename T, typename U,
-          std::float_round_style R = (std::float_round_style)(HALF_ROUND_STYLE)>
-struct half_caster {};
-template <typename U, std::float_round_style R> struct half_caster<half, U, R> {
-#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
-  static_assert(std::is_arithmetic<U>::value,
-                "half_cast from non-arithmetic type unsupported");
-#endif
-
-  static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
-
- private:
-  static half cast_impl(U arg, true_type) {
-    return half(binary, float2half<R>(arg));
-  }
-  static half cast_impl(U arg, false_type) {
-    return half(binary, int2half<R>(arg));
-  }
-};
-template <typename T, std::float_round_style R> struct half_caster<T, half, R> {
-#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
-  static_assert(std::is_arithmetic<T>::value,
-                "half_cast to non-arithmetic type unsupported");
-#endif
-
-  static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
-
- private:
-  static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
-  static T cast_impl(half arg, false_type) { return half2int<R, T>(arg.data_); }
-};
-template <typename T, std::float_round_style R> struct half_caster<T, expr, R> {
-#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
-  static_assert(std::is_arithmetic<T>::value,
-                "half_cast to non-arithmetic type unsupported");
-#endif
-
-  static T cast(expr arg) { return cast_impl(arg, is_float<T>()); }
-
- private:
-  static T cast_impl(float arg, true_type) { return static_cast<T>(arg); }
-  static T cast_impl(half arg, false_type) { return half2int<R, T>(arg.data_); }
-};
-template <std::float_round_style R> struct half_caster<half, half, R> {
-  static half cast(half arg) { return arg; }
-};
-template <std::float_round_style R>
-struct half_caster<half, expr, R> : half_caster<half, half, R> {};
-
-/// \name Comparison operators
-/// \{
-
-/// Comparison for equality.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if operands equal
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator==(T x, U y) {
-  return functions::isequal(x, y);
-}
-
-/// Comparison for inequality.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if operands not equal
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator!=(T x, U y) {
-  return functions::isnotequal(x, y);
-}
-
-/// Comparison for less than.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x less than \a y
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator<(T x, U y) {
-  return functions::isless(x, y);
-}
-
-/// Comparison for greater than.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x greater than \a y
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator>(T x, U y) {
-  return functions::isgreater(x, y);
-}
-
-/// Comparison for less equal.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x less equal \a y
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator<=(T x, U y) {
-  return functions::islessequal(x, y);
-}
-
-/// Comparison for greater equal.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x greater equal \a y
-/// \retval false else
-template <typename T, typename U>
-typename enable<bool, T, U>::type operator>=(T x, U y) {
-  return functions::isgreaterequal(x, y);
-}
-
-/// \}
-/// \name Arithmetic operators
-/// \{
-
-/// Add halfs.
-/// \param x left operand
-/// \param y right operand
-/// \return sum of half expressions
-template <typename T, typename U>
-typename enable<expr, T, U>::type operator+(T x, U y) {
-  return functions::plus(x, y);
-}
-
-/// Subtract halfs.
-/// \param x left operand
-/// \param y right operand
-/// \return difference of half expressions
-template <typename T, typename U>
-typename enable<expr, T, U>::type operator-(T x, U y) {
-  return functions::minus(x, y);
-}
-
-/// Multiply halfs.
-/// \param x left operand
-/// \param y right operand
-/// \return product of half expressions
-template <typename T, typename U>
-typename enable<expr, T, U>::type operator*(T x, U y) {
-  return functions::multiplies(x, y);
-}
-
-/// Divide halfs.
-/// \param x left operand
-/// \param y right operand
-/// \return quotient of half expressions
-template <typename T, typename U>
-typename enable<expr, T, U>::type operator/(T x, U y) {
-  return functions::divides(x, y);
-}
-
-/// Identity.
-/// \param arg operand
-/// \return uncahnged operand
-template <typename T>
-HALF_CONSTEXPR typename enable<T, T>::type operator+(T arg) {
-  return arg;
-}
-
-/// Negation.
-/// \param arg operand
-/// \return negated operand
-template <typename T>
-HALF_CONSTEXPR typename enable<T, T>::type operator-(T arg) {
-  return unary_specialized<T>::negate(arg);
-}
-
-/// \}
-/// \name Input and output
-/// \{
-
-/// Output operator.
-/// \param out output stream to write into
-/// \param arg half expression to write
-/// \return reference to output stream
-template <typename T, typename charT, typename traits>
-typename enable<std::basic_ostream<charT, traits>&, T>::type
-operator<<(std::basic_ostream<charT, traits>& out, T arg) {
-  return functions::write(out, arg);
-}
-
-/// Input operator.
-/// \param in input stream to read from
-/// \param arg half to read into
-/// \return reference to input stream
-template <typename charT, typename traits>
-std::basic_istream<charT, traits>&
-operator>>(std::basic_istream<charT, traits>& in, half& arg) {
-  return functions::read(in, arg);
-}
-
-/// \}
-/// \name Basic mathematical operations
-/// \{
-
-/// Absolute value.
-/// \param arg operand
-/// \return absolute value of \a arg
-//		template<typename T> typename enable<T,T>::type abs(T arg) {
-// return unary_specialized<T>::fabs(arg); }
-inline half abs(half arg) { return unary_specialized<half>::fabs(arg); }
-inline expr abs(expr arg) { return unary_specialized<expr>::fabs(arg); }
-
-/// Absolute value.
-/// \param arg operand
-/// \return absolute value of \a arg
-//		template<typename T> typename enable<T,T>::type fabs(T arg) {
-// return unary_specialized<T>::fabs(arg); }
-inline half fabs(half arg) { return unary_specialized<half>::fabs(arg); }
-inline expr fabs(expr arg) { return unary_specialized<expr>::fabs(arg); }
-
-/// Remainder of division.
-/// \param x first operand
-/// \param y second operand
-/// \return remainder of floating point division.
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// fmod(T x, U y) { return functions::fmod(x, y); }
-inline expr fmod(half x, half y) { return functions::fmod(x, y); }
-inline expr fmod(half x, expr y) { return functions::fmod(x, y); }
-inline expr fmod(expr x, half y) { return functions::fmod(x, y); }
-inline expr fmod(expr x, expr y) { return functions::fmod(x, y); }
-
-/// Remainder of division.
-/// \param x first operand
-/// \param y second operand
-/// \return remainder of floating point division.
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// remainder(T x, U y) { return
-// functions::remainder(x, y); }
-inline expr remainder(half x, half y) { return functions::remainder(x, y); }
-inline expr remainder(half x, expr y) { return functions::remainder(x, y); }
-inline expr remainder(expr x, half y) { return functions::remainder(x, y); }
-inline expr remainder(expr x, expr y) { return functions::remainder(x, y); }
-
-/// Remainder of division.
-/// \param x first operand
-/// \param y second operand
-/// \param quo address to store some bits of quotient at
-/// \return remainder of floating point division.
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// remquo(T x, U y, int *quo) { return
-// functions::remquo(x, y, quo); }
-inline expr remquo(half x, half y, int* quo) {
-  return functions::remquo(x, y, quo);
-}
-inline expr remquo(half x, expr y, int* quo) {
-  return functions::remquo(x, y, quo);
-}
-inline expr remquo(expr x, half y, int* quo) {
-  return functions::remquo(x, y, quo);
-}
-inline expr remquo(expr x, expr y, int* quo) {
-  return functions::remquo(x, y, quo);
-}
-
-/// Fused multiply add.
-/// \param x first operand
-/// \param y second operand
-/// \param z third operand
-/// \return ( \a x * \a y ) + \a z rounded as one operation.
-//		template<typename T,typename U,typename V> typename
-// enable<expr,T,U,V>::type fma(T x, U y, V z) { return
-// functions::fma(x, y, z); }
-inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); }
-inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); }
-inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); }
-inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); }
-inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); }
-inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); }
-inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); }
-inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); }
-
-/// Maximum of half expressions.
-/// \param x first operand
-/// \param y second operand
-/// \return maximum of operands
-//		template<typename T,typename U> typename result<T,U>::type
-// fmax(T
-// x, U y) { return
-// binary_specialized<T,U>::fmax(x, y); }
-inline half fmax(half x, half y) {
-  return binary_specialized<half, half>::fmax(x, y);
-}
-inline expr fmax(half x, expr y) {
-  return binary_specialized<half, expr>::fmax(x, y);
-}
-inline expr fmax(expr x, half y) {
-  return binary_specialized<expr, half>::fmax(x, y);
-}
-inline expr fmax(expr x, expr y) {
-  return binary_specialized<expr, expr>::fmax(x, y);
-}
-
-/// Minimum of half expressions.
-/// \param x first operand
-/// \param y second operand
-/// \return minimum of operands
-//		template<typename T,typename U> typename result<T,U>::type
-// fmin(T
-// x, U y) { return
-// binary_specialized<T,U>::fmin(x, y); }
-inline half fmin(half x, half y) {
-  return binary_specialized<half, half>::fmin(x, y);
-}
-inline expr fmin(half x, expr y) {
-  return binary_specialized<half, expr>::fmin(x, y);
-}
-inline expr fmin(expr x, half y) {
-  return binary_specialized<expr, half>::fmin(x, y);
-}
-inline expr fmin(expr x, expr y) {
-  return binary_specialized<expr, expr>::fmin(x, y);
-}
-
-/// Positive difference.
-/// \param x first operand
-/// \param y second operand
-/// \return \a x - \a y or 0 if difference negative
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// fdim(T x, U y) { return functions::fdim(x, y); }
-inline expr fdim(half x, half y) { return functions::fdim(x, y); }
-inline expr fdim(half x, expr y) { return functions::fdim(x, y); }
-inline expr fdim(expr x, half y) { return functions::fdim(x, y); }
-inline expr fdim(expr x, expr y) { return functions::fdim(x, y); }
-
-/// Get NaN value.
-/// \return quiet NaN
-inline half nanh(const char*) { return functions::nanh(); }
-
-/// \}
-/// \name Exponential functions
-/// \{
-
-/// Exponential function.
-/// \param arg function argument
-/// \return e raised to \a arg
-//		template<typename T> typename enable<expr,T>::type exp(T arg) {
-// return functions::exp(arg); }
-inline expr exp(half arg) { return functions::exp(arg); }
-inline expr exp(expr arg) { return functions::exp(arg); }
-
-/// Exponential minus one.
-/// \param arg function argument
-/// \return e raised to \a arg subtracted by 1
-//		template<typename T> typename enable<expr,T>::type expm1(T arg)
-//{
-// return functions::expm1(arg); }
-inline expr expm1(half arg) { return functions::expm1(arg); }
-inline expr expm1(expr arg) { return functions::expm1(arg); }
-
-/// Binary exponential.
-/// \param arg function argument
-/// \return 2 raised to \a arg
-//		template<typename T> typename enable<expr,T>::type exp2(T arg) {
-// return functions::exp2(arg); }
-inline expr exp2(half arg) { return functions::exp2(arg); }
-inline expr exp2(expr arg) { return functions::exp2(arg); }
-
-/// Natural logorithm.
-/// \param arg function argument
-/// \return logarithm of \a arg to base e
-//		template<typename T> typename enable<expr,T>::type log(T arg) {
-// return functions::log(arg); }
-inline expr log(half arg) { return functions::log(arg); }
-inline expr log(expr arg) { return functions::log(arg); }
-
-/// Common logorithm.
-/// \param arg function argument
-/// \return logarithm of \a arg to base 10
-//		template<typename T> typename enable<expr,T>::type log10(T arg)
-//{
-// return functions::log10(arg); }
-inline expr log10(half arg) { return functions::log10(arg); }
-inline expr log10(expr arg) { return functions::log10(arg); }
-
-/// Natural logorithm.
-/// \param arg function argument
-/// \return logarithm of \a arg plus 1 to base e
-//		template<typename T> typename enable<expr,T>::type log1p(T arg)
-//{
-// return functions::log1p(arg); }
-inline expr log1p(half arg) { return functions::log1p(arg); }
-inline expr log1p(expr arg) { return functions::log1p(arg); }
-
-/// Binary logorithm.
-/// \param arg function argument
-/// \return logarithm of \a arg to base 2
-//		template<typename T> typename enable<expr,T>::type log2(T arg) {
-// return functions::log2(arg); }
-inline expr log2(half arg) { return functions::log2(arg); }
-inline expr log2(expr arg) { return functions::log2(arg); }
-
-/// \}
-/// \name Power functions
-/// \{
-
-/// Square root.
-/// \param arg function argument
-/// \return square root of \a arg
-//		template<typename T> typename enable<expr,T>::type sqrt(T arg) {
-// return functions::sqrt(arg); }
-inline expr sqrt(half arg) { return functions::sqrt(arg); }
-inline expr sqrt(expr arg) { return functions::sqrt(arg); }
-
-/// Cubic root.
-/// \param arg function argument
-/// \return cubic root of \a arg
-//		template<typename T> typename enable<expr,T>::type cbrt(T arg) {
-// return functions::cbrt(arg); }
-inline expr cbrt(half arg) { return functions::cbrt(arg); }
-inline expr cbrt(expr arg) { return functions::cbrt(arg); }
-
-/// Hypotenuse function.
-/// \param x first argument
-/// \param y second argument
-/// \return square root of sum of squares without internal over- or underflows
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// hypot(T x, U y) { return functions::hypot(x, y);
-//}
-inline expr hypot(half x, half y) { return functions::hypot(x, y); }
-inline expr hypot(half x, expr y) { return functions::hypot(x, y); }
-inline expr hypot(expr x, half y) { return functions::hypot(x, y); }
-inline expr hypot(expr x, expr y) { return functions::hypot(x, y); }
-
-/// Power function.
-/// \param base first argument
-/// \param exp second argument
-/// \return \a base raised to \a exp
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// pow(T base, U exp) { return functions::pow(base,
-// exp); }
-inline expr pow(half base, half exp) { return functions::pow(base, exp); }
-inline expr pow(half base, expr exp) { return functions::pow(base, exp); }
-inline expr pow(expr base, half exp) { return functions::pow(base, exp); }
-inline expr pow(expr base, expr exp) { return functions::pow(base, exp); }
-
-/// \}
-/// \name Trigonometric functions
-/// \{
-
-/// Sine function.
-/// \param arg function argument
-/// \return sine value of \a arg
-//		template<typename T> typename enable<expr,T>::type sin(T arg) {
-// return functions::sin(arg); }
-inline expr sin(half arg) { return functions::sin(arg); }
-inline expr sin(expr arg) { return functions::sin(arg); }
-
-/// Cosine function.
-/// \param arg function argument
-/// \return cosine value of \a arg
-//		template<typename T> typename enable<expr,T>::type cos(T arg) {
-// return functions::cos(arg); }
-inline expr cos(half arg) { return functions::cos(arg); }
-inline expr cos(expr arg) { return functions::cos(arg); }
-
-/// Tangent function.
-/// \param arg function argument
-/// \return tangent value of \a arg
-//		template<typename T> typename enable<expr,T>::type tan(T arg) {
-// return functions::tan(arg); }
-inline expr tan(half arg) { return functions::tan(arg); }
-inline expr tan(expr arg) { return functions::tan(arg); }
-
-/// Arc sine.
-/// \param arg function argument
-/// \return arc sine value of \a arg
-//		template<typename T> typename enable<expr,T>::type asin(T arg) {
-// return functions::asin(arg); }
-inline expr asin(half arg) { return functions::asin(arg); }
-inline expr asin(expr arg) { return functions::asin(arg); }
-
-/// Arc cosine function.
-/// \param arg function argument
-/// \return arc cosine value of \a arg
-//		template<typename T> typename enable<expr,T>::type acos(T arg) {
-// return functions::acos(arg); }
-inline expr acos(half arg) { return functions::acos(arg); }
-inline expr acos(expr arg) { return functions::acos(arg); }
-
-/// Arc tangent function.
-/// \param arg function argument
-/// \return arc tangent value of \a arg
-//		template<typename T> typename enable<expr,T>::type atan(T arg) {
-// return functions::atan(arg); }
-inline expr atan(half arg) { return functions::atan(arg); }
-inline expr atan(expr arg) { return functions::atan(arg); }
-
-/// Arc tangent function.
-/// \param x first argument
-/// \param y second argument
-/// \return arc tangent value
-//		template<typename T,typename U> typename enable<expr,T,U>::type
-// atan2(T x, U y) { return functions::atan2(x, y);
-//}
-inline expr atan2(half x, half y) { return functions::atan2(x, y); }
-inline expr atan2(half x, expr y) { return functions::atan2(x, y); }
-inline expr atan2(expr x, half y) { return functions::atan2(x, y); }
-inline expr atan2(expr x, expr y) { return functions::atan2(x, y); }
-
-/// \}
-/// \name Hyperbolic functions
-/// \{
-
-/// Hyperbolic sine.
-/// \param arg function argument
-/// \return hyperbolic sine value of \a arg
-//		template<typename T> typename enable<expr,T>::type sinh(T arg) {
-// return functions::sinh(arg); }
-inline expr sinh(half arg) { return functions::sinh(arg); }
-inline expr sinh(expr arg) { return functions::sinh(arg); }
-
-/// Hyperbolic cosine.
-/// \param arg function argument
-/// \return hyperbolic cosine value of \a arg
-//		template<typename T> typename enable<expr,T>::type cosh(T arg) {
-// return functions::cosh(arg); }
-inline expr cosh(half arg) { return functions::cosh(arg); }
-inline expr cosh(expr arg) { return functions::cosh(arg); }
-
-/// Hyperbolic tangent.
-/// \param arg function argument
-/// \return hyperbolic tangent value of \a arg
-//		template<typename T> typename enable<expr,T>::type tanh(T arg) {
-// return functions::tanh(arg); }
-inline expr tanh(half arg) { return functions::tanh(arg); }
-inline expr tanh(expr arg) { return functions::tanh(arg); }
-
-/// Hyperbolic area sine.
-/// \param arg function argument
-/// \return area sine value of \a arg
-//		template<typename T> typename enable<expr,T>::type asinh(T arg)
-//{
-// return functions::asinh(arg); }
-inline expr asinh(half arg) { return functions::asinh(arg); }
-inline expr asinh(expr arg) { return functions::asinh(arg); }
-
-/// Hyperbolic area cosine.
-/// \param arg function argument
-/// \return area cosine value of \a arg
-//		template<typename T> typename enable<expr,T>::type acosh(T arg)
-//{
-// return functions::acosh(arg); }
-inline expr acosh(half arg) { return functions::acosh(arg); }
-inline expr acosh(expr arg) { return functions::acosh(arg); }
-
-/// Hyperbolic area tangent.
-/// \param arg function argument
-/// \return area tangent value of \a arg
-//		template<typename T> typename enable<expr,T>::type atanh(T arg)
-//{
-// return functions::atanh(arg); }
-inline expr atanh(half arg) { return functions::atanh(arg); }
-inline expr atanh(expr arg) { return functions::atanh(arg); }
-
-/// \}
-/// \name Error and gamma functions
-/// \{
-
-/// Error function.
-/// \param arg function argument
-/// \return error function value of \a arg
-//		template<typename T> typename enable<expr,T>::type erf(T arg) {
-// return functions::erf(arg); }
-inline expr erf(half arg) { return functions::erf(arg); }
-inline expr erf(expr arg) { return functions::erf(arg); }
-
-/// Complementary error function.
-/// \param arg function argument
-/// \return 1 minus error function value of \a arg
-//		template<typename T> typename enable<expr,T>::type erfc(T arg) {
-// return functions::erfc(arg); }
-inline expr erfc(half arg) { return functions::erfc(arg); }
-inline expr erfc(expr arg) { return functions::erfc(arg); }
-
-/// Natural logarithm of gamma function.
-/// \param arg function argument
-/// \return natural logarith of gamma function for \a arg
-//		template<typename T> typename enable<expr,T>::type lgamma(T arg)
-//{
-// return functions::lgamma(arg); }
-inline expr lgamma(half arg) { return functions::lgamma(arg); }
-inline expr lgamma(expr arg) { return functions::lgamma(arg); }
-
-/// Gamma function.
-/// \param arg function argument
-/// \return gamma function value of \a arg
-//		template<typename T> typename enable<expr,T>::type tgamma(T arg)
-//{
-// return functions::tgamma(arg); }
-inline expr tgamma(half arg) { return functions::tgamma(arg); }
-inline expr tgamma(expr arg) { return functions::tgamma(arg); }
-
-/// \}
-/// \name Rounding
-/// \{
-
-/// Nearest integer not less than half value.
-/// \param arg half to round
-/// \return nearest integer not less than \a arg
-//		template<typename T> typename enable<half,T>::type ceil(T arg) {
-// return functions::ceil(arg); }
-inline half ceil(half arg) { return functions::ceil(arg); }
-inline half ceil(expr arg) { return functions::ceil(arg); }
-
-/// Nearest integer not greater than half value.
-/// \param arg half to round
-/// \return nearest integer not greater than \a arg
-//		template<typename T> typename enable<half,T>::type floor(T arg)
-//{
-// return functions::floor(arg); }
-inline half floor(half arg) { return functions::floor(arg); }
-inline half floor(expr arg) { return functions::floor(arg); }
-
-/// Nearest integer not greater in magnitude than half value.
-/// \param arg half to round
-/// \return nearest integer not greater in magnitude than \a arg
-//		template<typename T> typename enable<half,T>::type trunc(T arg)
-//{
-// return functions::trunc(arg); }
-inline half trunc(half arg) { return functions::trunc(arg); }
-inline half trunc(expr arg) { return functions::trunc(arg); }
-
-/// Nearest integer.
-/// \param arg half to round
-/// \return nearest integer, rounded away from zero in half-way cases
-//		template<typename T> typename enable<half,T>::type round(T arg)
-//{
-// return functions::round(arg); }
-inline half round(half arg) { return functions::round(arg); }
-inline half round(expr arg) { return functions::round(arg); }
-
-/// Nearest integer.
-/// \param arg half to round
-/// \return nearest integer, rounded away from zero in half-way cases
-//		template<typename T> typename enable<long,T>::type lround(T arg)
-//{
-// return functions::lround(arg); }
-inline long lround(half arg) { return functions::lround(arg); }
-inline long lround(expr arg) { return functions::lround(arg); }
-
-/// Nearest integer using half's internal rounding mode.
-/// \param arg half expression to round
-/// \return nearest integer using default rounding mode
-//		template<typename T> typename enable<half,T>::type nearbyint(T
-// arg) { return functions::nearbyint(arg); }
-inline half nearbyint(half arg) { return functions::rint(arg); }
-inline half nearbyint(expr arg) { return functions::rint(arg); }
-
-/// Nearest integer using half's internal rounding mode.
-/// \param arg half expression to round
-/// \return nearest integer using default rounding mode
-//		template<typename T> typename enable<half,T>::type rint(T arg) {
-// return functions::rint(arg); }
-inline half rint(half arg) { return functions::rint(arg); }
-inline half rint(expr arg) { return functions::rint(arg); }
-
-/// Nearest integer using half's internal rounding mode.
-/// \param arg half expression to round
-/// \return nearest integer using default rounding mode
-//		template<typename T> typename enable<long,T>::type lrint(T arg)
-//{
-// return functions::lrint(arg); }
-inline long lrint(half arg) { return functions::lrint(arg); }
-inline long lrint(expr arg) { return functions::lrint(arg); }
-#if HALF_ENABLE_CPP11_LONG_LONG
-/// Nearest integer.
-/// \param arg half to round
-/// \return nearest integer, rounded away from zero in half-way cases
-//		template<typename T> typename enable<long long,T>::type
-// llround(T
-// arg) { return functions::llround(arg); }
-inline long long llround(half arg) { return functions::llround(arg); }
-inline long long llround(expr arg) { return functions::llround(arg); }
-
-/// Nearest integer using half's internal rounding mode.
-/// \param arg half expression to round
-/// \return nearest integer using default rounding mode
-//		template<typename T> typename enable<long long,T>::type llrint(T
-// arg) { return functions::llrint(arg); }
-inline long long llrint(half arg) { return functions::llrint(arg); }
-inline long long llrint(expr arg) { return functions::llrint(arg); }
-#endif
-
-/// \}
-/// \name Floating point manipulation
-/// \{
-
-/// Decompress floating point number.
-/// \param arg number to decompress
-/// \param exp address to store exponent at
-/// \return significant in range [0.5, 1)
-//		template<typename T> typename enable<half,T>::type frexp(T arg,
-// int *exp) { return functions::frexp(arg, exp); }
-inline half frexp(half arg, int* exp) { return functions::frexp(arg, exp); }
-inline half frexp(expr arg, int* exp) { return functions::frexp(arg, exp); }
-
-/// Multiply by power of two.
-/// \param arg number to modify
-/// \param exp power of two to multiply with
-/// \return \a arg multplied by 2 raised to \a exp
-//		template<typename T> typename enable<half,T>::type ldexp(T arg,
-// int exp) { return functions::scalbln(arg, exp);
-//}
-inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); }
-inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); }
-
-/// Extract integer and fractional parts.
-/// \param arg number to decompress
-/// \param iptr address to store integer part at
-/// \return fractional part
-//		template<typename T> typename enable<half,T>::type modf(T arg,
-// half *iptr) { return functions::modf(arg, iptr);
-//}
-inline half modf(half arg, half* iptr) { return functions::modf(arg, iptr); }
-inline half modf(expr arg, half* iptr) { return functions::modf(arg, iptr); }
-
-/// Multiply by power of two.
-/// \param arg number to modify
-/// \param exp power of two to multiply with
-/// \return \a arg multplied by 2 raised to \a exp
-//		template<typename T> typename enable<half,T>::type scalbn(T arg,
-// int exp) { return functions::scalbln(arg, exp);
-//}
-inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); }
-inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); }
-
-/// Multiply by power of two.
-/// \param arg number to modify
-/// \param exp power of two to multiply with
-/// \return \a arg multplied by 2 raised to \a exp
-//		template<typename T> typename enable<half,T>::type scalbln(T
-// arg,
-// long exp) { return functions::scalbln(arg,
-// exp);
-//}
-inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); }
-inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); }
-
-/// Extract exponent.
-/// \param arg number to query
-/// \return floating point exponent
-/// \retval FP_ILOGB0 for zero
-/// \retval FP_ILOGBNAN for NaN
-/// \retval MAX_INT for infinity
-//		template<typename T> typename enable<int,T>::type ilogb(T arg) {
-// return functions::ilogb(arg); }
-inline int ilogb(half arg) { return functions::ilogb(arg); }
-inline int ilogb(expr arg) { return functions::ilogb(arg); }
-
-/// Extract exponent.
-/// \param arg number to query
-/// \return floating point exponent
-//		template<typename T> typename enable<half,T>::type logb(T arg) {
-// return functions::logb(arg); }
-inline half logb(half arg) { return functions::logb(arg); }
-inline half logb(expr arg) { return functions::logb(arg); }
-
-/// Next representable value.
-/// \param from value to compute next representable value for
-/// \param to direction towards which to compute next value
-/// \return next representable value after \a from in direction towards \a to
-//		template<typename T,typename U> typename enable<half,T,U>::type
-// nextafter(T from, U to) { return
-// functions::nextafter(from, to); }
-inline half nextafter(half from, half to) {
-  return functions::nextafter(from, to);
-}
-inline half nextafter(half from, expr to) {
-  return functions::nextafter(from, to);
-}
-inline half nextafter(expr from, half to) {
-  return functions::nextafter(from, to);
-}
-inline half nextafter(expr from, expr to) {
-  return functions::nextafter(from, to);
-}
-
-/// Next representable value.
-/// \param from value to compute next representable value for
-/// \param to direction towards which to compute next value
-/// \return next representable value after \a from in direction towards \a to
-//		template<typename T> typename enable<half,T>::type nexttoward(T
-// from, long double to) { return
-// functions::nexttoward(from, to); }
-inline half nexttoward(half from, long double to) {
-  return functions::nexttoward(from, to);
-}
-inline half nexttoward(expr from, long double to) {
-  return functions::nexttoward(from, to);
-}
-
-/// Take sign.
-/// \param x value to change sign for
-/// \param y value to take sign from
-/// \return value equal to \a x in magnitude and to \a y in sign
-//		template<typename T,typename U> typename enable<half,T,U>::type
-// copysign(T x, U y) { return
-// functions::copysign(x, y); }
-inline half copysign(half x, half y) { return functions::copysign(x, y); }
-inline half copysign(half x, expr y) { return functions::copysign(x, y); }
-inline half copysign(expr x, half y) { return functions::copysign(x, y); }
-inline half copysign(expr x, expr y) { return functions::copysign(x, y); }
-
-/// \}
-/// \name Floating point classification
-/// \{
-
-/// Classify floating point value.
-/// \param arg number to classify
-/// \retval FP_ZERO for positive and negative zero
-/// \retval FP_SUBNORMAL for subnormal numbers
-/// \retval FP_INFINITY for positive and negative infinity
-/// \retval FP_NAN for NaNs
-/// \retval FP_NORMAL for all other (normal) values
-//		template<typename T> typename enable<int,T>::type fpclassify(T
-// arg) { return functions::fpclassify(arg); }
-inline int fpclassify(half arg) { return functions::fpclassify(arg); }
-inline int fpclassify(expr arg) { return functions::fpclassify(arg); }
-
-/// Check if finite number.
-/// \param arg number to check
-/// \retval true if neither infinity nor NaN
-/// \retval false else
-//		template<typename T> typename enable<bool,T>::type isfinite(T
-// arg)
-//{ return functions::isfinite(arg); }
-inline bool isfinite(half arg) { return functions::isfinite(arg); }
-inline bool isfinite(expr arg) { return functions::isfinite(arg); }
-
-/// Check for infinity.
-/// \param arg number to check
-/// \retval true for positive or negative infinity
-/// \retval false else
-//		template<typename T> typename enable<bool,T>::type isinf(T arg)
-//{
-// return functions::isinf(arg); }
-inline bool isinf(half arg) { return functions::isinf(arg); }
-inline bool isinf(expr arg) { return functions::isinf(arg); }
-
-/// Check for NaN.
-/// \param arg number to check
-/// \retval true for NaNs
-/// \retval false else
-//		template<typename T> typename enable<bool,T>::type isnan(T arg)
-//{
-// return functions::isnan(arg); }
-inline bool isnan(half arg) { return functions::isnan(arg); }
-inline bool isnan(expr arg) { return functions::isnan(arg); }
-
-/// Check if normal number.
-/// \param arg number to check
-/// \retval true if normal number
-/// \retval false if either subnormal, zero, infinity or NaN
-//		template<typename T> typename enable<bool,T>::type isnormal(T
-// arg)
-//{ return functions::isnormal(arg); }
-inline bool isnormal(half arg) { return functions::isnormal(arg); }
-inline bool isnormal(expr arg) { return functions::isnormal(arg); }
-
-/// Check sign.
-/// \param arg number to check
-/// \retval true for negative number
-/// \retval false for positive number
-//		template<typename T> typename enable<bool,T>::type signbit(T
-// arg)
-//{ return functions::signbit(arg); }
-inline bool signbit(half arg) { return functions::signbit(arg); }
-inline bool signbit(expr arg) { return functions::signbit(arg); }
-
-/// \}
-/// \name Comparison
-/// \{
-
-/// Comparison for greater than.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x greater than \a y
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// isgreater(T x, U y) { return
-// functions::isgreater(x, y); }
-inline bool isgreater(half x, half y) { return functions::isgreater(x, y); }
-inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); }
-inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); }
-inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); }
-
-/// Comparison for greater equal.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x greater equal \a y
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// isgreaterequal(T x, U y) { return
-// functions::isgreaterequal(x, y); }
-inline bool isgreaterequal(half x, half y) {
-  return functions::isgreaterequal(x, y);
-}
-inline bool isgreaterequal(half x, expr y) {
-  return functions::isgreaterequal(x, y);
-}
-inline bool isgreaterequal(expr x, half y) {
-  return functions::isgreaterequal(x, y);
-}
-inline bool isgreaterequal(expr x, expr y) {
-  return functions::isgreaterequal(x, y);
-}
-
-/// Comparison for less than.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x less than \a y
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// isless(T x, U y) { return functions::isless(x,
-// y);
-//}
-inline bool isless(half x, half y) { return functions::isless(x, y); }
-inline bool isless(half x, expr y) { return functions::isless(x, y); }
-inline bool isless(expr x, half y) { return functions::isless(x, y); }
-inline bool isless(expr x, expr y) { return functions::isless(x, y); }
-
-/// Comparison for less equal.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if \a x less equal \a y
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// islessequal(T x, U y) { return
-// functions::islessequal(x, y); }
-inline bool islessequal(half x, half y) { return functions::islessequal(x, y); }
-inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); }
-inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); }
-inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); }
-
-/// Comarison for less or greater.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if either less or greater
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// islessgreater(T x, U y) { return
-// functions::islessgreater(x, y); }
-inline bool islessgreater(half x, half y) {
-  return functions::islessgreater(x, y);
-}
-inline bool islessgreater(half x, expr y) {
-  return functions::islessgreater(x, y);
-}
-inline bool islessgreater(expr x, half y) {
-  return functions::islessgreater(x, y);
-}
-inline bool islessgreater(expr x, expr y) {
-  return functions::islessgreater(x, y);
-}
-
-/// Check if unordered.
-/// \param x first operand
-/// \param y second operand
-/// \retval true if unordered (one or two NaN operands)
-/// \retval false else
-//		template<typename T,typename U> typename enable<bool,T,U>::type
-// isunordered(T x, U y) { return
-// functions::isunordered(x, y); }
-inline bool isunordered(half x, half y) { return functions::isunordered(x, y); }
-inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); }
-inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); }
-inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); }
-
-/// \name Casting
-/// \{
-
-/// Cast to or from half-precision floating point number.
-/// This casts between [half](\ref half_float::half) and any built-in arithmetic
-/// type. The values are converted
-/// directly using the given rounding mode, without any roundtrip over `float`
-/// that a `static_cast` would otherwise do.
-/// It uses the default rounding mode.
-///
-/// Using this cast with neither of the two types being a [half](\ref
-/// half_float::half) or with any of the two types
-/// not being a built-in arithmetic type (apart from [half](\ref
-/// half_float::half), of course) results in a compiler
-/// error and casting between [half](\ref half_float::half)s is just a no-op.
-/// \tparam T destination type (half or built-in arithmetic type)
-/// \tparam U source type (half or built-in arithmetic type)
-/// \param arg value to cast
-/// \return \a arg converted to destination type
-template <typename T, typename U> T half_cast(U arg) {
-  return half_caster<T, U>::cast(arg);
-}
-
-/// Cast to or from half-precision floating point number.
-/// This casts between [half](\ref half_float::half) and any built-in arithmetic
-/// type. The values are converted
-/// directly using the given rounding mode, without any roundtrip over `float`
-/// that a `static_cast` would otherwise do.
-///
-/// Using this cast with neither of the two types being a [half](\ref
-/// half_float::half) or with any of the two types
-/// not being a built-in arithmetic type (apart from [half](\ref
-/// half_float::half), of course) results in a compiler
-/// error and casting between [half](\ref half_float::half)s is just a no-op.
-/// \tparam T destination type (half or built-in arithmetic type)
-/// \tparam R rounding mode to use.
-/// \tparam U source type (half or built-in arithmetic type)
-/// \param arg value to cast
-/// \return \a arg converted to destination type
-template <typename T, std::float_round_style R, typename U> T half_cast(U arg) {
-  return half_caster<T, U, R>::cast(arg);
-}
-/// \}
-} // namespace detail
-
-using detail::operator==;
-using detail::operator!=;
-using detail::operator<;
-using detail::operator>;
-using detail::operator<=;
-using detail::operator>=;
-using detail::operator+;
-using detail::operator-;
-using detail::operator*;
-using detail::operator/;
-using detail::operator<<;
-using detail::operator>>;
-
-using detail::abs;
-using detail::acos;
-using detail::acosh;
-using detail::asin;
-using detail::asinh;
-using detail::atan;
-using detail::atan2;
-using detail::atanh;
-using detail::cbrt;
-using detail::ceil;
-using detail::cos;
-using detail::cosh;
-using detail::erf;
-using detail::erfc;
-using detail::exp;
-using detail::exp2;
-using detail::expm1;
-using detail::fabs;
-using detail::fdim;
-using detail::floor;
-using detail::fma;
-using detail::fmax;
-using detail::fmin;
-using detail::fmod;
-using detail::hypot;
-using detail::lgamma;
-using detail::log;
-using detail::log10;
-using detail::log1p;
-using detail::log2;
-using detail::lrint;
-using detail::lround;
-using detail::nanh;
-using detail::nearbyint;
-using detail::pow;
-using detail::remainder;
-using detail::remquo;
-using detail::rint;
-using detail::round;
-using detail::sin;
-using detail::sinh;
-using detail::sqrt;
-using detail::tan;
-using detail::tanh;
-using detail::tgamma;
-using detail::trunc;
-#if HALF_ENABLE_CPP11_LONG_LONG
-using detail::llrint;
-using detail::llround;
-#endif
-using detail::copysign;
-using detail::fpclassify;
-using detail::frexp;
-using detail::ilogb;
-using detail::isfinite;
-using detail::isgreater;
-using detail::isgreaterequal;
-using detail::isinf;
-using detail::isless;
-using detail::islessequal;
-using detail::islessgreater;
-using detail::isnan;
-using detail::isnormal;
-using detail::isunordered;
-using detail::ldexp;
-using detail::logb;
-using detail::modf;
-using detail::nextafter;
-using detail::nexttoward;
-using detail::scalbln;
-using detail::scalbn;
-using detail::signbit;
-
-using detail::half_cast;
-} // namespace half_float
-
-/// Extensions to the C++ standard library.
-namespace std {
-/// Numeric limits for half-precision floats.
-/// Because of the underlying single-precision implementation of many
-/// operations, it inherits some properties from
-/// `std::numeric_limits<float>`.
-template <>
-class numeric_limits<half_float::half> : public numeric_limits<float> {
- public:
-  /// Supports signed values.
-  static HALF_CONSTEXPR_CONST bool is_signed = true;
-
-  /// Is not exact.
-  static HALF_CONSTEXPR_CONST bool is_exact = false;
-
-  /// Doesn't provide modulo arithmetic.
-  static HALF_CONSTEXPR_CONST bool is_modulo = false;
-
-  /// IEEE conformant.
-  static HALF_CONSTEXPR_CONST bool is_iec559 = true;
-
-  /// Supports infinity.
-  static HALF_CONSTEXPR_CONST bool has_infinity = true;
-
-  /// Supports quiet NaNs.
-  static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
-
-  /// Supports subnormal values.
-  static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
-
-  /// Rounding mode.
-  /// Due to the mix of internal single-precision computations (using the
-  /// rounding mode of the underlying
-  /// single-precision implementation) with the rounding mode of the
-  /// single-to-half conversions, the actual rounding
-  /// mode might be `std::round_indeterminate` if the default half-precision
-  /// rounding mode doesn't match the
-  /// single-precision rounding mode.
-  static HALF_CONSTEXPR_CONST float_round_style round_style =
-      (std::numeric_limits<float>::round_style == half_float::half::round_style)
-          ? half_float::half::round_style
-          : round_indeterminate;
-
-  /// Significant digits.
-  static HALF_CONSTEXPR_CONST int digits = 11;
-
-  /// Significant decimal digits.
-  static HALF_CONSTEXPR_CONST int digits10 = 3;
-
-  /// Required decimal digits to represent all possible values.
-  static HALF_CONSTEXPR_CONST int max_digits10 = 5;
-
-  /// Number base.
-  static HALF_CONSTEXPR_CONST int radix = 2;
-
-  /// One more than smallest exponent.
-  static HALF_CONSTEXPR_CONST int min_exponent = -13;
-
-  /// Smallest normalized representable power of 10.
-  static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
-
-  /// One more than largest exponent
-  static HALF_CONSTEXPR_CONST int max_exponent = 16;
-
-  /// Largest finitely representable power of 10.
-  static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
-
-  /// Smallest positive normal value.
-  static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x0400);
-  }
-
-  /// Smallest finite value.
-  static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0xFBFF);
-  }
-
-  /// Largest finite value.
-  static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x7BFF);
-  }
-
-  /// Difference between one and next representable value.
-  static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x1400);
-  }
-
-  /// Maximum rounding error.
-  static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary,
-                            (round_style == std::round_to_nearest) ? 0x3800
-                                                                   : 0x3C00);
-  }
-
-  /// Positive infinity.
-  static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x7C00);
-  }
-
-  /// Quiet NaN.
-  static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x7FFF);
-  }
-
-  /// Signalling NaN.
-  static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x7DFF);
-  }
-
-  /// Smallest positive subnormal value.
-  static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW {
-    return half_float::half(half_float::detail::binary, 0x0001);
-  }
-};
-
-#if HALF_ENABLE_CPP11_HASH
-/// Hash function for half-precision floats.
-/// This is only defined if C++11 `std::hash` is supported and enabled.
-template <>
-struct hash<half_float::half> //: unary_function<half_float::half,size_t>
-{
-  /// Type of function argument.
-  typedef half_float::half argument_type;
-
-  /// Function return type.
-  typedef size_t result_type;
-
-  /// Compute hash function.
-  /// \param arg half to hash
-  /// \return hash value
-  result_type operator()(argument_type arg) const {
-    return hash<half_float::detail::uint16>()(static_cast<unsigned>(arg.data_) &
-                                              -(arg.data_ != 0x8000));
-  }
-};
-#endif
-} // namespace std
-
-#undef HALF_CONSTEXPR
-#undef HALF_CONSTEXPR_CONST
-#undef HALF_NOEXCEPT
-#undef HALF_NOTHROW
-#ifdef HALF_POP_WARNINGS
-#pragma warning(pop)
-#undef HALF_POP_WARNINGS
-#endif
-
-#endif
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp b/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp
deleted file mode 100644
index 1e1671558..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "logger.h"
-#include "ErrorRecorder.h"
-#include "logging.h"
-
-SampleErrorRecorder gRecorder;
-namespace sample {
-Logger gLogger{Logger::Severity::kINFO};
-LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
-LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
-LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
-LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
-LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
-
-void setReportableSeverity(Logger::Severity severity) {
-  gLogger.setReportableSeverity(severity);
-  gLogVerbose.setReportableSeverity(severity);
-  gLogInfo.setReportableSeverity(severity);
-  gLogWarning.setReportableSeverity(severity);
-  gLogError.setReportableSeverity(severity);
-  gLogFatal.setReportableSeverity(severity);
-}
-} // namespace sample
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logger.h b/csrcs/fastdeploy/backends/tensorrt/common/logger.h
deleted file mode 100644
index ab642744e..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/logger.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LOGGER_H
-#define LOGGER_H
-
-#include "logging.h"
-
-class SampleErrorRecorder;
-extern SampleErrorRecorder gRecorder;
-namespace sample {
-extern Logger gLogger;
-extern LogStreamConsumer gLogVerbose;
-extern LogStreamConsumer gLogInfo;
-extern LogStreamConsumer gLogWarning;
-extern LogStreamConsumer gLogError;
-extern LogStreamConsumer gLogFatal;
-
-void setReportableSeverity(Logger::Severity severity);
-} // namespace sample
-
-#endif // LOGGER_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logging.h b/csrcs/fastdeploy/backends/tensorrt/common/logging.h
deleted file mode 100644
index abcb6b406..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/logging.h
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_LOGGING_H
-#define TENSORRT_LOGGING_H
-
-#include "NvInferRuntimeCommon.h"
-#include "sampleOptions.h"
-#include <cassert>
-#include <ctime>
-#include <iomanip>
-#include <iostream>
-#include <mutex>
-#include <ostream>
-#include <sstream>
-#include <string>
-
-namespace sample {
-
-using Severity = nvinfer1::ILogger::Severity;
-
-class LogStreamConsumerBuffer : public std::stringbuf {
- public:
-  LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
-                          bool shouldLog)
-      : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
-
-  LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
-      : mOutput(other.mOutput), mPrefix(other.mPrefix),
-        mShouldLog(other.mShouldLog) {}
-  LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
-  LogStreamConsumerBuffer() = delete;
-  LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
-  LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
-
-  ~LogStreamConsumerBuffer() override {
-    // std::streambuf::pbase() gives a pointer to the beginning of the buffered
-    // part of the output sequence
-    // std::streambuf::pptr() gives a pointer to the current position of the
-    // output sequence
-    // if the pointer to the beginning is not equal to the pointer to the
-    // current position,
-    // call putOutput() to log the output to the stream
-    if (pbase() != pptr()) {
-      putOutput();
-    }
-  }
-
-  //!
-  //! synchronizes the stream buffer and returns 0 on success
-  //! synchronizing the stream buffer consists of inserting the buffer contents
-  //! into the stream,
-  //! resetting the buffer and flushing the stream
-  //!
-  int32_t sync() override {
-    putOutput();
-    return 0;
-  }
-
-  void putOutput() {
-    if (mShouldLog) {
-      // prepend timestamp
-      std::time_t timestamp = std::time(nullptr);
-      tm* tm_local = std::localtime(&timestamp);
-      mOutput << "[";
-      mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
-              << "/";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
-      mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
-              << "-";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
-      mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
-      // std::stringbuf::str() gets the string contents of the buffer
-      // insert the buffer contents pre-appended by the appropriate prefix into
-      // the stream
-      mOutput << mPrefix << str();
-    }
-    // set the buffer to empty
-    str("");
-    // flush the stream
-    mOutput.flush();
-  }
-
-  void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
-
- private:
-  std::ostream& mOutput;
-  std::string mPrefix;
-  bool mShouldLog{};
-}; // class LogStreamConsumerBuffer
-
-//!
-//! \class LogStreamConsumerBase
-//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
-//! std::ostream in LogStreamConsumer
-//!
-class LogStreamConsumerBase {
- public:
-  LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
-                        bool shouldLog)
-      : mBuffer(stream, prefix, shouldLog) {}
-
- protected:
-  std::mutex mLogMutex;
-  LogStreamConsumerBuffer mBuffer;
-}; // class LogStreamConsumerBase
-
-//!
-//! \class LogStreamConsumer
-//! \brief Convenience object used to facilitate use of C++ stream syntax when
-//! logging messages.
-//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
-//!  This is because the LogStreamConsumerBase class is used to initialize the
-//!  LogStreamConsumerBuffer member field
-//!  in LogStreamConsumer and then the address of the buffer is passed to
-//!  std::ostream.
-//!  This is necessary to prevent the address of an uninitialized buffer from
-//!  being passed to std::ostream.
-//!  Please do not change the order of the parent classes.
-//!
-class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
- public:
-  //!
-  //! \brief Creates a LogStreamConsumer which logs messages with level
-  //! severity.
-  //!  Reportable severity determines if the messages are severe enough to be
-  //!  logged.
-  //!
-  LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
-                    nvinfer1::ILogger::Severity severity)
-      : LogStreamConsumerBase(severityOstream(severity),
-                              severityPrefix(severity),
-                              severity <= reportableSeverity),
-        std::ostream(&mBuffer) // links the stream buffer with the stream
-        ,
-        mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
-
-  LogStreamConsumer(LogStreamConsumer&& other) noexcept
-      : LogStreamConsumerBase(severityOstream(other.mSeverity),
-                              severityPrefix(other.mSeverity),
-                              other.mShouldLog),
-        std::ostream(&mBuffer) // links the stream buffer with the stream
-        ,
-        mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
-  LogStreamConsumer(const LogStreamConsumer& other) = delete;
-  LogStreamConsumer() = delete;
-  ~LogStreamConsumer() = default;
-  LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
-  LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
-
-  void setReportableSeverity(Severity reportableSeverity) {
-    mShouldLog = mSeverity <= reportableSeverity;
-    mBuffer.setShouldLog(mShouldLog);
-  }
-
-  std::mutex& getMutex() { return mLogMutex; }
-
-  bool getShouldLog() const { return mShouldLog; }
-
- private:
-  static std::ostream& severityOstream(Severity severity) {
-    return severity >= Severity::kINFO ? std::cout : std::cerr;
-  }
-
-  static std::string severityPrefix(Severity severity) {
-    switch (severity) {
-    case Severity::kINTERNAL_ERROR:
-      return "[F] ";
-    case Severity::kERROR:
-      return "[E] ";
-    case Severity::kWARNING:
-      return "[W] ";
-    case Severity::kINFO:
-      return "[I] ";
-    case Severity::kVERBOSE:
-      return "[V] ";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  bool mShouldLog;
-  Severity mSeverity;
-}; // class LogStreamConsumer
-
-template <typename T>
-LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    os << obj;
-  }
-  return logger;
-}
-
-//!
-//! Special handling std::endl
-//!
-inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
-                                     std::ostream& (*f)(std::ostream&)) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    os << f;
-  }
-  return logger;
-}
-
-inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
-                                     const nvinfer1::Dims& dims) {
-  if (logger.getShouldLog()) {
-    std::lock_guard<std::mutex> guard(logger.getMutex());
-    auto& os = static_cast<std::ostream&>(logger);
-    for (int32_t i = 0; i < dims.nbDims; ++i) {
-      os << (i ? "x" : "") << dims.d[i];
-    }
-  }
-  return logger;
-}
-
-//!
-//! \class Logger
-//!
-//! \brief Class which manages logging of TensorRT tools and samples
-//!
-//! \details This class provides a common interface for TensorRT tools and
-//! samples to log information to the console,
-//! and supports logging two types of messages:
-//!
-//! - Debugging messages with an associated severity (info, warning, error, or
-//! internal error/fatal)
-//! - Test pass/fail messages
-//!
-//! The advantage of having all samples use this class for logging as opposed to
-//! emitting directly to stdout/stderr is
-//! that the logic for controlling the verbosity and formatting of sample output
-//! is centralized in one location.
-//!
-//! In the future, this class could be extended to support dumping test results
-//! to a file in some standard format
-//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
-//! duration of a test run).
-//!
-//! TODO: For backwards compatibility with existing samples, this class inherits
-//! directly from the nvinfer1::ILogger
-//! interface, which is problematic since there isn't a clean separation between
-//! messages coming from the TensorRT
-//! library and messages coming from the sample.
-//!
-//! In the future (once all samples are updated to use Logger::getTRTLogger() to
-//! access the ILogger) we can refactor the
-//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
-//! implementation a member of the Logger
-//! object.
-//!
-class Logger : public nvinfer1::ILogger {
- public:
-  explicit Logger(Severity severity = Severity::kWARNING)
-      : mReportableSeverity(severity) {}
-
-  //!
-  //! \enum TestResult
-  //! \brief Represents the state of a given test
-  //!
-  enum class TestResult {
-    kRUNNING, //!< The test is running
-    kPASSED,  //!< The test passed
-    kFAILED,  //!< The test failed
-    kWAIVED   //!< The test was waived
-  };
-
-  //!
-  //! \brief Forward-compatible method for retrieving the nvinfer::ILogger
-  //! associated with this Logger
-  //! \return The nvinfer1::ILogger associated with this Logger
-  //!
-  //! TODO Once all samples are updated to use this method to register the
-  //! logger with TensorRT,
-  //! we can eliminate the inheritance of Logger from ILogger
-  //!
-  nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
-
-  //!
-  //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
-  //!
-  //! Note samples should not be calling this function directly; it will
-  //! eventually go away once we eliminate the
-  //! inheritance from nvinfer1::ILogger
-  //!
-  void log(Severity severity, const char* msg) noexcept override {
-    LogStreamConsumer(mReportableSeverity, severity)
-        << "[TRT] " << std::string(msg) << std::endl;
-  }
-
-  //!
-  //! \brief Method for controlling the verbosity of logging output
-  //!
-  //! \param severity The logger will only emit messages that have severity of
-  //! this level or higher.
-  //!
-  void setReportableSeverity(Severity severity) noexcept {
-    mReportableSeverity = severity;
-  }
-
-  //!
-  //! \brief Opaque handle that holds logging information for a particular test
-  //!
-  //! This object is an opaque handle to information used by the Logger to print
-  //! test results.
-  //! The sample must call Logger::defineTest() in order to obtain a TestAtom
-  //! that can be used
-  //! with Logger::reportTest{Start,End}().
-  //!
-  class TestAtom {
-   public:
-    TestAtom(TestAtom&&) = default;
-
-   private:
-    friend class Logger;
-
-    TestAtom(bool started, const std::string& name, const std::string& cmdline)
-        : mStarted(started), mName(name), mCmdline(cmdline) {}
-
-    bool mStarted;
-    std::string mName;
-    std::string mCmdline;
-  };
-
-  //!
-  //! \brief Define a test for logging
-  //!
-  //! \param[in] name The name of the test.  This should be a string starting
-  //! with
-  //!                  "TensorRT" and containing dot-separated strings
-  //!                  containing
-  //!                  the characters [A-Za-z0-9_].
-  //!                  For example, "TensorRT.sample_googlenet"
-  //! \param[in] cmdline The command line used to reproduce the test
-  //
-  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
-  //!
-  static TestAtom defineTest(const std::string& name,
-                             const std::string& cmdline) {
-    return TestAtom(false, name, cmdline);
-  }
-
-  //!
-  //! \brief A convenience overloaded version of defineTest() that accepts an
-  //! array of command-line arguments
-  //!        as input
-  //!
-  //! \param[in] name The name of the test
-  //! \param[in] argc The number of command-line arguments
-  //! \param[in] argv The array of command-line arguments (given as C strings)
-  //!
-  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
-  //!
-  static TestAtom defineTest(const std::string& name, int32_t argc,
-                             char const* const* argv) {
-    // Append TensorRT version as info
-    const std::string vname =
-        name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
-    auto cmdline = genCmdlineString(argc, argv);
-    return defineTest(vname, cmdline);
-  }
-
-  //!
-  //! \brief Report that a test has started.
-  //!
-  //! \pre reportTestStart() has not been called yet for the given testAtom
-  //!
-  //! \param[in] testAtom The handle to the test that has started
-  //!
-  static void reportTestStart(TestAtom& testAtom) {
-    reportTestResult(testAtom, TestResult::kRUNNING);
-    assert(!testAtom.mStarted);
-    testAtom.mStarted = true;
-  }
-
-  //!
-  //! \brief Report that a test has ended.
-  //!
-  //! \pre reportTestStart() has been called for the given testAtom
-  //!
-  //! \param[in] testAtom The handle to the test that has ended
-  //! \param[in] result The result of the test. Should be one of
-  //! TestResult::kPASSED,
-  //!                   TestResult::kFAILED, TestResult::kWAIVED
-  //!
-  static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
-    assert(result != TestResult::kRUNNING);
-    assert(testAtom.mStarted);
-    reportTestResult(testAtom, result);
-  }
-
-  static int32_t reportPass(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kPASSED);
-    return EXIT_SUCCESS;
-  }
-
-  static int32_t reportFail(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kFAILED);
-    return EXIT_FAILURE;
-  }
-
-  static int32_t reportWaive(TestAtom const& testAtom) {
-    reportTestEnd(testAtom, TestResult::kWAIVED);
-    return EXIT_SUCCESS;
-  }
-
-  static int32_t reportTest(TestAtom const& testAtom, bool pass) {
-    return pass ? reportPass(testAtom) : reportFail(testAtom);
-  }
-
-  Severity getReportableSeverity() const { return mReportableSeverity; }
-
- private:
-  //!
-  //! \brief returns an appropriate string for prefixing a log message with the
-  //! given severity
-  //!
-  static const char* severityPrefix(Severity severity) {
-    switch (severity) {
-    case Severity::kINTERNAL_ERROR:
-      return "[F] ";
-    case Severity::kERROR:
-      return "[E] ";
-    case Severity::kWARNING:
-      return "[W] ";
-    case Severity::kINFO:
-      return "[I] ";
-    case Severity::kVERBOSE:
-      return "[V] ";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  //!
-  //! \brief returns an appropriate string for prefixing a test result message
-  //! with the given result
-  //!
-  static const char* testResultString(TestResult result) {
-    switch (result) {
-    case TestResult::kRUNNING:
-      return "RUNNING";
-    case TestResult::kPASSED:
-      return "PASSED";
-    case TestResult::kFAILED:
-      return "FAILED";
-    case TestResult::kWAIVED:
-      return "WAIVED";
-    default:
-      assert(0);
-      return "";
-    }
-  }
-
-  //!
-  //! \brief returns an appropriate output stream (cout or cerr) to use with the
-  //! given severity
-  //!
-  static std::ostream& severityOstream(Severity severity) {
-    return severity >= Severity::kINFO ? std::cout : std::cerr;
-  }
-
-  //!
-  //! \brief method that implements logging test results
-  //!
-  static void reportTestResult(TestAtom const& testAtom, TestResult result) {
-    severityOstream(Severity::kINFO)
-        << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
-        << testAtom.mCmdline << std::endl;
-  }
-
-  //!
-  //! \brief generate a command line string from the given (argc, argv) values
-  //!
-  static std::string genCmdlineString(int32_t argc, char const* const* argv) {
-    std::stringstream ss;
-    for (int32_t i = 0; i < argc; i++) {
-      if (i > 0) {
-        ss << " ";
-      }
-      ss << argv[i];
-    }
-    return ss.str();
-  }
-
-  Severity mReportableSeverity;
-}; // class Logger
-
-namespace {
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kVERBOSE
-//!
-//! Example usage:
-//!
-//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kINFO
-//!
-//! Example usage:
-//!
-//!     LOG_INFO(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_INFO(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kWARNING
-//!
-//! Example usage:
-//!
-//!     LOG_WARN(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_WARN(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kERROR
-//!
-//! Example usage:
-//!
-//!     LOG_ERROR(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
-}
-
-//!
-//! \brief produces a LogStreamConsumer object that can be used to log messages
-//! of severity kINTERNAL_ERROR
-//!        ("fatal" severity)
-//!
-//! Example usage:
-//!
-//!     LOG_FATAL(logger) << "hello world" << std::endl;
-//!
-inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
-  return LogStreamConsumer(logger.getReportableSeverity(),
-                           Severity::kINTERNAL_ERROR);
-}
-} // anonymous namespace
-} // namespace sample
-#endif // TENSORRT_LOGGING_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h b/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
deleted file mode 100644
index 8569ca01c..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PARSER_ONNX_CONFIG_H
-#define PARSER_ONNX_CONFIG_H
-
-#include <cstring>
-#include <iostream>
-#include <string>
-
-#include "NvInfer.h"
-#include "NvOnnxConfig.h"
-#include "NvOnnxParser.h"
-
-#define ONNX_DEBUG 1
-
-/**
- * \class ParserOnnxConfig
- * \brief Configuration Manager Class Concrete Implementation
- *
- * \note:
- *
- */
-
-using namespace std;
-
-class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
- protected:
-  string mModelFilename{};
-  string mTextFilename{};
-  string mFullTextFilename{};
-  nvinfer1::DataType mModelDtype;
-  nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
-  bool mPrintLayercInfo;
-
- public:
-  ParserOnnxConfig()
-      : mModelDtype(nvinfer1::DataType::kFLOAT),
-        mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
-        mPrintLayercInfo(false) {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
-    }
-#endif
-  }
-
- protected:
-  ~ParserOnnxConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
-    }
-#endif
-  }
-
- public:
-  virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
-    mModelDtype = modelDtype;
-  }
-
-  virtual nvinfer1::DataType getModelDtype() const noexcept {
-    return mModelDtype;
-  }
-
-  virtual const char* getModelFileName() const noexcept {
-    return mModelFilename.c_str();
-  }
-  virtual void setModelFileName(const char* onnxFilename) noexcept {
-    mModelFilename = string(onnxFilename);
-  }
-  virtual nvonnxparser::IOnnxConfig::Verbosity
-  getVerbosityLevel() const noexcept {
-    return mVerbosity;
-  }
-  virtual void addVerbosity() noexcept { ++mVerbosity; }
-  virtual void reduceVerbosity() noexcept { --mVerbosity; }
-  virtual void
-  setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
-    mVerbosity = verbosity;
-  }
-
-  virtual const char* getTextFileName() const noexcept {
-    return mTextFilename.c_str();
-  }
-  virtual void setTextFileName(const char* textFilename) noexcept {
-    mTextFilename = string(textFilename);
-  }
-  virtual const char* getFullTextFileName() const noexcept {
-    return mFullTextFilename.c_str();
-  }
-  virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
-    mFullTextFilename = string(fullTextFilename);
-  }
-  virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
-  virtual void setPrintLayerInfo(bool src) noexcept {
-    mPrintLayercInfo = src;
-  } //!< get the boolean variable corresponding to the Layer Info, see
-    //! getPrintLayerInfo()
-
-  virtual bool isDebug() const noexcept {
-#if ONNX_DEBUG
-    return (std::getenv("ONNX_DEBUG") ? true : false);
-#else
-    return false;
-#endif
-  }
-
-  virtual void destroy() noexcept { delete this; }
-
-}; // class ParserOnnxConfig
-
-#endif
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h b/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h
deleted file mode 100644
index 1aa92ad22..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TENSORRT_SAFE_COMMON_H
-#define TENSORRT_SAFE_COMMON_H
-
-#include "NvInferRuntimeCommon.h"
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-#include <string>
-
-#define CHECK(status)                                                          \
-  do {                                                                         \
-    auto ret = (status);                                                       \
-    if (ret != 0) {                                                            \
-      std::cerr << "Cuda failure: " << ret << std::endl;                       \
-      abort();                                                                 \
-    }                                                                          \
-  } while (0)
-
-namespace samplesCommon {
-template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
-  if (!obj) {
-    throw std::runtime_error("Failed to create object");
-  }
-  return std::shared_ptr<T>(obj);
-}
-
-inline uint32_t elementSize(nvinfer1::DataType t) {
-  switch (t) {
-  case nvinfer1::DataType::kINT32:
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  case nvinfer1::DataType::kBOOL:
-    return 1;
-  }
-  return 0;
-}
-
-template <typename A, typename B> inline A divUp(A x, B n) {
-  return (x + n - 1) / n;
-}
-
-} // namespace samplesCommon
-
-#endif // TENSORRT_SAFE_COMMON_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h
deleted file mode 100644
index a097f4dbe..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SampleConfig_H
-#define SampleConfig_H
-
-#include <cstring>
-#include <iostream>
-#include <string>
-
-#include "NvInfer.h"
-#include "NvOnnxConfig.h"
-class SampleConfig : public nvonnxparser::IOnnxConfig {
- public:
-  enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
-
- private:
-  std::string mModelFilename;
-  std::string mEngineFilename;
-  std::string mTextFilename;
-  std::string mFullTextFilename;
-  std::string mImageFilename;
-  std::string mReferenceFilename;
-  std::string mOutputFilename;
-  std::string mCalibrationFilename;
-  std::string mTimingCacheFilename;
-  int64_t mLabel{-1};
-  int64_t mMaxBatchSize{32};
-  int64_t mCalibBatchSize{0};
-  int64_t mMaxNCalibBatch{0};
-  int64_t mFirstCalibBatch{0};
-  int64_t mUseDLACore{-1};
-  nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
-  bool mTF32{true};
-  Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
-  bool mPrintLayercInfo{false};
-  bool mDebugBuilder{false};
-  InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
-  uint64_t mTopK{0};
-  float mFailurePercentage{-1.0f};
-  float mTolerance{0.0f};
-  float mAbsTolerance{1e-5f};
-
- public:
-  SampleConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
-    }
-#endif
-  }
-
- protected:
-  ~SampleConfig() {
-#ifdef ONNX_DEBUG
-    if (isDebug()) {
-      std::cout << "SampleConfig::dtor(): " << this << std::endl;
-    }
-#endif
-  }
-
- public:
-  void setModelDtype(const nvinfer1::DataType mdt) noexcept {
-    mModelDtype = mdt;
-  }
-
-  nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
-
-  bool getTF32() const noexcept { return mTF32; }
-
-  void setTF32(bool enabled) noexcept { mTF32 = enabled; }
-
-  const char* getModelFileName() const noexcept {
-    return mModelFilename.c_str();
-  }
-
-  void setModelFileName(const char* onnxFilename) noexcept {
-    mModelFilename = std::string(onnxFilename);
-  }
-  Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
-  void addVerbosity() noexcept { ++mVerbosity; }
-  void reduceVerbosity() noexcept { --mVerbosity; }
-  virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
-  const char* getEngineFileName() const noexcept {
-    return mEngineFilename.c_str();
-  }
-  void setEngineFileName(const char* engineFilename) noexcept {
-    mEngineFilename = std::string(engineFilename);
-  }
-  const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
-  void setTextFileName(const char* textFilename) noexcept {
-    mTextFilename = std::string(textFilename);
-  }
-  const char* getFullTextFileName() const noexcept {
-    return mFullTextFilename.c_str();
-  }
-  void setFullTextFileName(const char* fullTextFilename) noexcept {
-    mFullTextFilename = std::string(fullTextFilename);
-  }
-  void setLabel(int64_t label) noexcept { mLabel = label; } //!<  set the Label
-
-  int64_t getLabel() const noexcept { return mLabel; } //!<  get the Label
-
-  bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
-
-  void setPrintLayerInfo(bool b) noexcept {
-    mPrintLayercInfo = b;
-  } //!< get the boolean variable corresponding to the Layer Info, see
-    //! getPrintLayerInfo()
-
-  void setMaxBatchSize(int64_t maxBatchSize) noexcept {
-    mMaxBatchSize = maxBatchSize;
-  } //!<  set the Max Batch Size
-  int64_t getMaxBatchSize() const noexcept {
-    return mMaxBatchSize;
-  } //!<  get the Max Batch Size
-
-  void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
-    mCalibBatchSize = CalibBatchSize;
-  } //!<  set the calibration batch size
-  int64_t getCalibBatchSize() const noexcept {
-    return mCalibBatchSize;
-  } //!<  get calibration batch size
-
-  void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
-    mMaxNCalibBatch = MaxNCalibBatch;
-  } //!<  set Max Number of Calibration Batches
-  int64_t getMaxNCalibBatch() const noexcept {
-    return mMaxNCalibBatch;
-  } //!<  get the Max Number of Calibration Batches
-
-  void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
-    mFirstCalibBatch = FirstCalibBatch;
-  } //!<  set the first calibration batch
-  int64_t getFirstCalibBatch() const noexcept {
-    return mFirstCalibBatch;
-  } //!<  get the first calibration batch
-
-  void setUseDLACore(int64_t UseDLACore) noexcept {
-    mUseDLACore = UseDLACore;
-  } //!<  set the DLA core to use
-  int64_t getUseDLACore() const noexcept {
-    return mUseDLACore;
-  } //!<  get the DLA core to use
-
-  void setDebugBuilder() noexcept {
-    mDebugBuilder = true;
-  } //!<  enable the Debug info, while building the engine.
-  bool getDebugBuilder() const noexcept {
-    return mDebugBuilder;
-  } //!<  get the boolean variable, corresponding to the debug builder
-
-  const char*
-  getImageFileName() const noexcept //!<  set Image file name (PPM or ASCII)
-  {
-    return mImageFilename.c_str();
-  }
-  void setImageFileName(
-      const char* imageFilename) noexcept //!< get the Image file name
-  {
-    mImageFilename = std::string(imageFilename);
-  }
-  const char* getReferenceFileName() const noexcept {
-    return mReferenceFilename.c_str();
-  }
-  void setReferenceFileName(
-      const char* referenceFilename) noexcept //!<  set reference file name
-  {
-    mReferenceFilename = std::string(referenceFilename);
-  }
-
-  void setInputDataFormat(InputDataFormat idt) noexcept {
-    mInputDataFormat = idt;
-  } //!<  specifies expected data format of the image file (PPM or ASCII)
-  InputDataFormat getInputDataFormat() const noexcept {
-    return mInputDataFormat;
-  } //!<  returns the expected data format of the image file.
-
-  const char* getOutputFileName()
-      const noexcept //!<  specifies the file to save the results
-  {
-    return mOutputFilename.c_str();
-  }
-  void setOutputFileName(
-      const char* outputFilename) noexcept //!<  get the output file name
-  {
-    mOutputFilename = std::string(outputFilename);
-  }
-
-  const char* getCalibrationFileName() const noexcept {
-    return mCalibrationFilename.c_str();
-  } //!<  specifies the file containing the list of image files for int8
-    //! calibration
-  void setCalibrationFileName(
-      const char* calibrationFilename) noexcept //!<  get the int 8 calibration
-                                                //! list file name
-  {
-    mCalibrationFilename = std::string(calibrationFilename);
-  }
-
-  uint64_t getTopK() const noexcept { return mTopK; }
-  void setTopK(uint64_t topK) noexcept {
-    mTopK = topK;
-  } //!<  If this options is specified, return the K top probabilities.
-
-  float getFailurePercentage() const noexcept { return mFailurePercentage; }
-
-  void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
-
-  float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
-
-  void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
-
-  float getTolerance() const noexcept { return mTolerance; }
-
-  void setTolerance(float t) noexcept { mTolerance = t; }
-
-  const char* getTimingCacheFilename() const noexcept {
-    return mTimingCacheFilename.c_str();
-  }
-
-  void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
-    mTimingCacheFilename = std::string(timingCacheFilename);
-  }
-
-  bool isDebug() const noexcept {
-#if ONNX_DEBUG
-    return (std::getenv("ONNX_DEBUG") ? true : false);
-#else
-    return false;
-#endif
-  }
-
-  void destroy() noexcept { delete this; }
-
-}; // class SampleConfig
-
-#endif
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h
deleted file mode 100644
index cdbb08019..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_DEVICE_H
-#define TRT_SAMPLE_DEVICE_H
-
-#include <cassert>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <iostream>
-#include <thread>
-
-namespace sample {
-
-inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
-  if (ret != cudaSuccess) {
-    err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
-    abort();
-  }
-}
-
-class TrtCudaEvent;
-
-namespace {
-
-void cudaSleep(void* sleep) {
-  std::this_thread::sleep_for(
-      std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
-}
-
-} // namespace
-
-//!
-//! \class TrtCudaStream
-//! \brief Managed CUDA stream
-//!
-class TrtCudaStream {
- public:
-  TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
-
-  TrtCudaStream(const TrtCudaStream&) = delete;
-
-  TrtCudaStream& operator=(const TrtCudaStream&) = delete;
-
-  TrtCudaStream(TrtCudaStream&&) = delete;
-
-  TrtCudaStream& operator=(TrtCudaStream&&) = delete;
-
-  ~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
-
-  cudaStream_t get() const { return mStream; }
-
-  void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
-
-  void wait(TrtCudaEvent& event);
-
-  void sleep(float* ms) {
-    cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
-  }
-
- private:
-  cudaStream_t mStream{};
-};
-
-//!
-//! \class TrtCudaEvent
-//! \brief Managed CUDA event
-//!
-class TrtCudaEvent {
- public:
-  explicit TrtCudaEvent(bool blocking = true) {
-    const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
-    cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
-  }
-
-  TrtCudaEvent(const TrtCudaEvent&) = delete;
-
-  TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
-
-  TrtCudaEvent(TrtCudaEvent&&) = delete;
-
-  TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
-
-  ~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
-
-  cudaEvent_t get() const { return mEvent; }
-
-  void record(const TrtCudaStream& stream) {
-    cudaCheck(cudaEventRecord(mEvent, stream.get()));
-  }
-
-  void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
-
-  // Returns time elapsed time in milliseconds
-  float operator-(const TrtCudaEvent& e) const {
-    float time{0};
-    cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
-    return time;
-  }
-
- private:
-  cudaEvent_t mEvent{};
-};
-
-inline void TrtCudaStream::wait(TrtCudaEvent& event) {
-  cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
-}
-
-//!
-//! \class TrtCudaGraph
-//! \brief Managed CUDA graph
-//!
-class TrtCudaGraph {
- public:
-  explicit TrtCudaGraph() = default;
-
-  TrtCudaGraph(const TrtCudaGraph&) = delete;
-
-  TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
-
-  TrtCudaGraph(TrtCudaGraph&&) = delete;
-
-  TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
-
-  ~TrtCudaGraph() {
-    if (mGraphExec) {
-      cudaGraphExecDestroy(mGraphExec);
-    }
-  }
-
-  void beginCapture(TrtCudaStream& stream) {
-    cudaCheck(
-        cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
-  }
-
-  bool launch(TrtCudaStream& stream) {
-    return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
-  }
-
-  void endCapture(TrtCudaStream& stream) {
-    cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
-    cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
-    cudaCheck(cudaGraphDestroy(mGraph));
-  }
-
-  void endCaptureOnError(TrtCudaStream& stream) {
-    // There are two possibilities why stream capture would fail:
-    // (1) stream is in cudaErrorStreamCaptureInvalidated state.
-    // (2) TRT reports a failure.
-    // In case (1), the returning mGraph should be nullptr.
-    // In case (2), the returning mGraph is not nullptr, but it should not be
-    // used.
-    const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
-    if (ret == cudaErrorStreamCaptureInvalidated) {
-      assert(mGraph == nullptr);
-    } else {
-      assert(ret == cudaSuccess);
-      assert(mGraph != nullptr);
-      cudaCheck(cudaGraphDestroy(mGraph));
-      mGraph = nullptr;
-    }
-    // Clean up any CUDA error.
-    cudaGetLastError();
-    sample::gLogWarning << "The CUDA graph capture on the stream has failed."
-                        << std::endl;
-  }
-
- private:
-  cudaGraph_t mGraph{};
-  cudaGraphExec_t mGraphExec{};
-};
-
-//!
-//! \class TrtCudaBuffer
-//! \brief Managed buffer for host and device
-//!
-template <typename A, typename D> class TrtCudaBuffer {
- public:
-  TrtCudaBuffer() = default;
-
-  TrtCudaBuffer(const TrtCudaBuffer&) = delete;
-
-  TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
-
-  TrtCudaBuffer(TrtCudaBuffer&& rhs) {
-    reset(rhs.mPtr);
-    rhs.mPtr = nullptr;
-  }
-
-  TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
-    if (this != &rhs) {
-      reset(rhs.mPtr);
-      rhs.mPtr = nullptr;
-    }
-    return *this;
-  }
-
-  ~TrtCudaBuffer() { reset(); }
-
-  TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
-
-  void allocate(size_t size) {
-    reset();
-    A()(&mPtr, size);
-  }
-
-  void reset(void* ptr = nullptr) {
-    if (mPtr) {
-      D()(mPtr);
-    }
-    mPtr = ptr;
-  }
-
-  void* get() const { return mPtr; }
-
- private:
-  void* mPtr{nullptr};
-};
-
-struct DeviceAllocator {
-  void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
-};
-
-struct DeviceDeallocator {
-  void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
-};
-
-struct ManagedAllocator {
-  void operator()(void** ptr, size_t size) {
-    cudaCheck(cudaMallocManaged(ptr, size));
-  }
-};
-
-struct HostAllocator {
-  void operator()(void** ptr, size_t size) {
-    cudaCheck(cudaMallocHost(ptr, size));
-  }
-};
-
-struct HostDeallocator {
-  void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
-};
-
-using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
-using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
-
-using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
-
-//!
-//! \class MirroredBuffer
-//! \brief Coupled host and device buffers
-//!
-class IMirroredBuffer {
- public:
-  //!
-  //! Allocate memory for the mirrored buffer give the size
-  //! of the allocation.
-  //!
-  virtual void allocate(size_t size) = 0;
-
-  //!
-  //! Get the pointer to the device side buffer.
-  //!
-  //! \return pointer to device memory or nullptr if uninitialized.
-  //!
-  virtual void* getDeviceBuffer() const = 0;
-
-  //!
-  //! Get the pointer to the host side buffer.
-  //!
-  //! \return pointer to host memory or nullptr if uninitialized.
-  //!
-  virtual void* getHostBuffer() const = 0;
-
-  //!
-  //! Copy the memory from host to device.
-  //!
-  virtual void hostToDevice(TrtCudaStream& stream) = 0;
-
-  //!
-  //! Copy the memory from device to host.
-  //!
-  virtual void deviceToHost(TrtCudaStream& stream) = 0;
-
-  //!
-  //! Interface to get the size of the memory
-  //!
-  //! \return the size of memory allocated.
-  //!
-  virtual size_t getSize() const = 0;
-
-  //!
-  //! Virtual destructor declaraion
-  //!
-  virtual ~IMirroredBuffer() = default;
-
-}; // class IMirroredBuffer
-
-//!
-//! Class to have a seperate memory buffer for discrete device and host
-//! allocations.
-//!
-class DiscreteMirroredBuffer : public IMirroredBuffer {
- public:
-  void allocate(size_t size) {
-    mSize = size;
-    mHostBuffer.allocate(size);
-    mDeviceBuffer.allocate(size);
-  }
-
-  void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
-
-  void* getHostBuffer() const { return mHostBuffer.get(); }
-
-  void hostToDevice(TrtCudaStream& stream) {
-    cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
-                              cudaMemcpyHostToDevice, stream.get()));
-  }
-
-  void deviceToHost(TrtCudaStream& stream) {
-    cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
-                              cudaMemcpyDeviceToHost, stream.get()));
-  }
-
-  size_t getSize() const { return mSize; }
-
- private:
-  size_t mSize{0};
-  TrtHostBuffer mHostBuffer;
-  TrtDeviceBuffer mDeviceBuffer;
-}; // class DiscreteMirroredBuffer
-
-//!
-//! Class to have a unified memory buffer for embedded devices.
-//!
-class UnifiedMirroredBuffer : public IMirroredBuffer {
- public:
-  void allocate(size_t size) {
-    mSize = size;
-    mBuffer.allocate(size);
-  }
-
-  void* getDeviceBuffer() const { return mBuffer.get(); }
-
-  void* getHostBuffer() const { return mBuffer.get(); }
-
-  void hostToDevice(TrtCudaStream& stream) {
-    // Does nothing since we are using unified memory.
-  }
-
-  void deviceToHost(TrtCudaStream& stream) {
-    // Does nothing since we are using unified memory.
-  }
-
-  size_t getSize() const { return mSize; }
-
- private:
-  size_t mSize{0};
-  TrtManagedBuffer mBuffer;
-}; // class UnifiedMirroredBuffer
-
-inline void setCudaDevice(int device, std::ostream& os) {
-  cudaCheck(cudaSetDevice(device));
-
-  cudaDeviceProp properties;
-  cudaCheck(cudaGetDeviceProperties(&properties, device));
-
-  // clang-format off
-    os << "=== Device Information ===" << std::endl;
-    os << "Selected Device: "      << properties.name                                               << std::endl;
-    os << "Compute Capability: "   << properties.major << "." << properties.minor                   << std::endl;
-    os << "SMs: "                  << properties.multiProcessorCount                                << std::endl;
-    os << "Compute Clock Rate: "   << properties.clockRate / 1000000.0F << " GHz"                   << std::endl;
-    os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB"                   << std::endl;
-    os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB"       << std::endl;
-    os << "Memory Bus Width: "     << properties.memoryBusWidth << " bits"
-                        << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
-    os << "Memory Clock Rate: "    << properties.memoryClockRate / 1000000.0F << " GHz"             << std::endl;
-  // clang-format on
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_DEVICE_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
deleted file mode 100644
index 6c1ab35b1..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp
+++ /dev/null
@@ -1,1710 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <random>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-//#include "NvCaffeParser.h"
-#include "NvInfer.h"
-#include "NvOnnxParser.h"
-
-#include "ErrorRecorder.h"
-#include "common.h"
-#include "half.h"
-#include "logger.h"
-#include "sampleEngines.h"
-#include "sampleOptions.h"
-#include "sampleUtils.h"
-
-#if !defined(_WIN32)
-#include <dlfcn.h>
-#endif
-
-using namespace nvinfer1;
-
-namespace sample {
-
-namespace {
-
-//struct CaffeBufferShutter {
-//  ~CaffeBufferShutter() { nvcaffeparser1::shutdownProtobufLibrary(); }
-//};
-
-std::map<std::string, float>
-readScalesFromCalibrationCache(const std::string& calibrationFile) {
-  std::map<std::string, float> tensorScales;
-  std::ifstream cache{calibrationFile};
-  if (!cache.is_open()) {
-    sample::gLogError << "[TRT] Can not open provided calibration cache file"
-                      << std::endl;
-    return tensorScales;
-  }
-  std::string line;
-  while (std::getline(cache, line)) {
-    auto colonPos = line.find_last_of(':');
-    if (colonPos != std::string::npos) {
-      // Scales should be stored in calibration cache as 32-bit floating numbers
-      // encoded as 32-bit integers
-      int32_t scalesAsInt =
-          std::stoi(line.substr(colonPos + 2, 8), nullptr, 16);
-      const auto tensorName = line.substr(0, colonPos);
-      tensorScales[tensorName] = *reinterpret_cast<float*>(&scalesAsInt);
-    }
-  }
-  cache.close();
-  return tensorScales;
-}
-} // namespace
-
-void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
-                                    const std::vector<IOFormat>& inputFormats,
-                                    const std::vector<IOFormat>& outputFormats,
-                                    const std::string& calibrationFile) {
-  const auto tensorScales = readScalesFromCalibrationCache(calibrationFile);
-  const bool broadcastInputFormats =
-      broadcastIOFormats(inputFormats, network.getNbInputs());
-  for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) {
-    int32_t formatIdx = broadcastInputFormats ? 0 : i;
-    if (!inputFormats.empty() &&
-        inputFormats[formatIdx].first == DataType::kINT8) {
-      auto* input = network.getInput(i);
-      const auto calibScale = tensorScales.at(input->getName());
-      input->setDynamicRange(-127 * calibScale, 127 * calibScale);
-    }
-  }
-  const bool broadcastOutputFormats =
-      broadcastIOFormats(outputFormats, network.getNbInputs());
-  for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) {
-    int32_t formatIdx = broadcastOutputFormats ? 0 : i;
-    if (!outputFormats.empty() &&
-        outputFormats[formatIdx].first == DataType::kINT8) {
-      auto* output = network.getOutput(i);
-      const auto calibScale = tensorScales.at(output->getName());
-      output->setDynamicRange(-127 * calibScale, 127 * calibScale);
-    }
-  }
-}
-
-#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err)                       \
-  {                                                                            \
-    if ((condition) == false) {                                                \
-      (err) << (msg) << std::endl;                                             \
-      return retval;                                                           \
-    }                                                                          \
-  }
-
-Parser modelToNetwork(const ModelOptions& model,
-                      nvinfer1::INetworkDefinition& network,
-                      std::ostream& err) {
-  sample::gLogInfo << "Start parsing network model" << std::endl;
-  Parser parser;
-  const std::string& modelName = model.baseModel.model;
-  switch (model.baseModel.format) {
-/*
-  case ModelFormat::kCAFFE: {
-    using namespace nvcaffeparser1;
-    parser.caffeParser.reset(createCaffeParser());
-    CaffeBufferShutter bufferShutter;
-    const auto* const blobNameToTensor = parser.caffeParser->parse(
-        model.prototxt.c_str(), modelName.empty() ? nullptr : modelName.c_str(),
-        network, DataType::kFLOAT);
-    if (!blobNameToTensor) {
-      err << "Failed to parse caffe model or prototxt, tensors blob not found"
-          << std::endl;
-      parser.caffeParser.reset();
-      break;
-    }
-
-    for (const auto& s : model.outputs) {
-      if (blobNameToTensor->find(s.c_str()) == nullptr) {
-        err << "Could not find output blob " << s << std::endl;
-        parser.caffeParser.reset();
-        break;
-      }
-      network.markOutput(*blobNameToTensor->find(s.c_str()));
-    }
-    break;
-  }
-*/
-  case ModelFormat::kONNX: {
-    using namespace nvonnxparser;
-    parser.onnxParser.reset(
-        createParser(network, sample::gLogger.getTRTLogger()));
-    if (!parser.onnxParser->parseFromFile(
-            model.baseModel.model.c_str(),
-            static_cast<int>(sample::gLogger.getReportableSeverity()))) {
-      err << "Failed to parse onnx file" << std::endl;
-      parser.onnxParser.reset();
-    }
-    break;
-  }
-  case ModelFormat::kANY:
-    break;
-  }
-
-  sample::gLogInfo << "Finish parsing network model" << std::endl;
-  return parser;
-}
-
-namespace {
-
-class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 {
- public:
-  RndInt8Calibrator(int batches, std::vector<int64_t>& elemCount,
-                    const std::string& cacheFile,
-                    const nvinfer1::INetworkDefinition& network,
-                    std::ostream& err);
-
-  ~RndInt8Calibrator() {
-    for (auto& elem : mInputDeviceBuffers) {
-      cudaCheck(cudaFree(elem.second), mErr);
-    }
-  }
-
-  bool getBatch(void* bindings[], const char* names[],
-                int nbBindings) noexcept override;
-
-  int getBatchSize() const noexcept override { return 1; }
-
-  const void* readCalibrationCache(size_t& length) noexcept override;
-
-  virtual void writeCalibrationCache(const void*, size_t) noexcept override {}
-
- private:
-  int mBatches{};
-  int mCurrentBatch{};
-  std::string mCacheFile;
-  std::map<std::string, void*> mInputDeviceBuffers;
-  std::vector<char> mCalibrationCache;
-  std::ostream& mErr;
-};
-
-RndInt8Calibrator::RndInt8Calibrator(int batches,
-                                     std::vector<int64_t>& elemCount,
-                                     const std::string& cacheFile,
-                                     const INetworkDefinition& network,
-                                     std::ostream& err)
-    : mBatches(batches), mCurrentBatch(0), mCacheFile(cacheFile), mErr(err) {
-  std::ifstream tryCache(cacheFile, std::ios::binary);
-  if (tryCache.good()) {
-    return;
-  }
-
-  std::default_random_engine generator;
-  std::uniform_real_distribution<float> distribution(-1.0F, 1.0F);
-  auto gen = [&generator, &distribution]() { return distribution(generator); };
-
-  for (int i = 0; i < network.getNbInputs(); i++) {
-    auto* input = network.getInput(i);
-    std::vector<float> rnd_data(elemCount[i]);
-    std::generate_n(rnd_data.begin(), elemCount[i], gen);
-
-    void* data;
-    cudaCheck(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr);
-    cudaCheck(cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float),
-                         cudaMemcpyHostToDevice),
-              mErr);
-
-    mInputDeviceBuffers.insert(std::make_pair(input->getName(), data));
-  }
-}
-
-bool RndInt8Calibrator::getBatch(void* bindings[], const char* names[],
-                                 int nbBindings) noexcept {
-  if (mCurrentBatch >= mBatches) {
-    return false;
-  }
-
-  for (int i = 0; i < nbBindings; ++i) {
-    bindings[i] = mInputDeviceBuffers[names[i]];
-  }
-
-  ++mCurrentBatch;
-
-  return true;
-}
-
-const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept {
-  mCalibrationCache.clear();
-  std::ifstream input(mCacheFile, std::ios::binary);
-  input >> std::noskipws;
-  if (input.good()) {
-    std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
-              std::back_inserter(mCalibrationCache));
-  }
-
-  length = mCalibrationCache.size();
-  return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr;
-}
-
-bool setTensorDynamicRange(const INetworkDefinition& network,
-                           float inRange = 2.0F, float outRange = 4.0F) {
-  // Ensure that all layer inputs have a dynamic range.
-  for (int l = 0; l < network.getNbLayers(); l++) {
-    auto* layer = network.getLayer(l);
-    for (int i = 0; i < layer->getNbInputs(); i++) {
-      ITensor* input{layer->getInput(i)};
-      // Optional inputs are nullptr here and are from RNN layers.
-      if (input && !input->dynamicRangeIsSet()) {
-        // Concat should propagate dynamic range from outputs to inputs to avoid
-        // Re-quantization during the concatenation
-        auto dynRange = (layer->getType() == LayerType::kCONCATENATION)
-                            ? outRange
-                            : inRange;
-        if (!input->setDynamicRange(-dynRange, dynRange)) {
-          return false;
-        }
-      }
-    }
-    for (int o = 0; o < layer->getNbOutputs(); o++) {
-      ITensor* output{layer->getOutput(o)};
-      // Optional outputs are nullptr here and are from RNN layers.
-      if (output && !output->dynamicRangeIsSet()) {
-        // Pooling must have the same input and output dynamic range.
-        if (layer->getType() == LayerType::kPOOLING) {
-          if (!output->setDynamicRange(-inRange, inRange)) {
-            return false;
-          }
-        } else {
-          if (!output->setDynamicRange(-outRange, outRange)) {
-            return false;
-          }
-        }
-      }
-    }
-  }
-  return true;
-}
-
-// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0.
-template <typename T>
-void sparsify(const T* values, int64_t count, int32_t k, int32_t rs,
-              std::vector<char>& sparseWeights) {
-  const auto c = count / (k * rs);
-  sparseWeights.resize(count * sizeof(T));
-  auto* sparseValues = reinterpret_cast<T*>(sparseWeights.data());
-
-  constexpr int32_t window = 4;
-  constexpr int32_t nonzeros = 2;
-
-  const int32_t crs = c * rs;
-  const auto getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) {
-    return ki * crs + ci * rs + rsi;
-  };
-
-  for (int64_t ki = 0; ki < k; ++ki) {
-    for (int64_t rsi = 0; rsi < rs; ++rsi) {
-      int32_t w = 0;
-      int32_t nz = 0;
-      for (int64_t ci = 0; ci < c; ++ci) {
-        const auto index = getIndex(ki, ci, rsi);
-        if (nz < nonzeros) {
-          sparseValues[index] = values[index];
-          ++nz;
-        } else {
-          sparseValues[index] = 0;
-        }
-        if (++w == window) {
-          w = 0;
-          nz = 0;
-        }
-      }
-    }
-  }
-}
-
-void sparsify(const Weights& weights, int32_t k, int32_t rs,
-              std::vector<char>& sparseWeights) {
-  switch (weights.type) {
-  case DataType::kFLOAT:
-    sparsify(static_cast<const float*>(weights.values), weights.count, k, rs,
-             sparseWeights);
-    break;
-  case DataType::kHALF:
-    sparsify(static_cast<const half_float::half*>(weights.values),
-             weights.count, k, rs, sparseWeights);
-    break;
-  case DataType::kINT8:
-  case DataType::kINT32:
-  case DataType::kBOOL:
-    break;
-  }
-}
-
-template <typename L>
-void setSparseWeights(L& l, int32_t k, int32_t rs,
-                      std::vector<char>& sparseWeights) {
-  auto weights = l.getKernelWeights();
-  sparsify(weights, k, rs, sparseWeights);
-  weights.values = sparseWeights.data();
-  l.setKernelWeights(weights);
-}
-
-template <typename T>
-void transpose2DWeights(void* dst, void const* src, int32_t const m,
-                        int32_t const n) {
-  ASSERT(dst != src);
-  T* tdst = reinterpret_cast<T*>(dst);
-  T const* tsrc = reinterpret_cast<T const*>(src);
-  for (int32_t mi = 0; mi < m; ++mi) {
-    for (int32_t ni = 0; ni < n; ++ni) {
-      int32_t const isrc = mi * n + ni;
-      int32_t const idst = ni * m + mi;
-      tdst[idst] = tsrc[isrc];
-    }
-  }
-}
-
-// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle
-// layers.
-// Forward analysis on the API graph to determine which weights to sparsify.
-void sparsifyMatMulKernelWeights(
-    INetworkDefinition& network,
-    std::vector<std::vector<char>>& sparseWeights) {
-  using TensorToLayer = std::unordered_map<ITensor*, ILayer*>;
-  using LayerToTensor = std::unordered_map<ILayer*, ITensor*>;
-
-  // 1. Collect layers and tensors information from the network.
-  TensorToLayer matmulI2L;
-  TensorToLayer constO2L;
-  TensorToLayer shuffleI2L;
-  LayerToTensor shuffleL2O;
-  auto collectMappingInfo = [&](int32_t const idx) {
-    ILayer* l = network.getLayer(idx);
-    switch (l->getType()) {
-    case LayerType::kMATRIX_MULTIPLY: {
-      // assume weights on the second input.
-      matmulI2L.insert({l->getInput(1), l});
-      break;
-    }
-    case LayerType::kCONSTANT: {
-      DataType const dtype = static_cast<IConstantLayer*>(l)->getWeights().type;
-      if (dtype == DataType::kFLOAT || dtype == DataType::kHALF) {
-        // Sparsify float only.
-        constO2L.insert({l->getOutput(0), l});
-      }
-      break;
-    }
-    case LayerType::kSHUFFLE: {
-      shuffleI2L.insert({l->getInput(0), l});
-      shuffleL2O.insert({l, l->getOutput(0)});
-      break;
-    }
-    default:
-      break;
-    }
-  };
-  int32_t const nbLayers = network.getNbLayers();
-  for (int32_t i = 0; i < nbLayers; ++i) {
-    collectMappingInfo(i);
-  }
-  if (matmulI2L.size() == 0 || constO2L.size() == 0) {
-    // No MatrixMultiply or Constant layer found, no weights to sparsify.
-    return;
-  }
-
-  // Helper for analysis
-  auto isTranspose = [](Permutation const& perm) -> bool {
-    return (perm.order[0] == 1 && perm.order[1] == 0);
-  };
-  auto is2D = [](Dims const& dims) -> bool { return dims.nbDims == 2; };
-  auto isIdenticalReshape = [](Dims const& dims) -> bool {
-    for (int32_t i = 0; i < dims.nbDims; ++i) {
-      if (dims.d[i] != i || dims.d[i] != -1) {
-        return false;
-      }
-    }
-    return true;
-  };
-  auto tensorReachedViaTranspose = [&](ITensor* t,
-                                       bool& needTranspose) -> ITensor* {
-    while (shuffleI2L.find(t) != shuffleI2L.end()) {
-      IShuffleLayer* s = static_cast<IShuffleLayer*>(shuffleI2L.at(t));
-      if (!is2D(s->getInput(0)->getDimensions()) ||
-          !is2D(s->getReshapeDimensions()) ||
-          !isIdenticalReshape(s->getReshapeDimensions())) {
-        break;
-      }
-
-      if (isTranspose(s->getFirstTranspose())) {
-        needTranspose = !needTranspose;
-      }
-      if (isTranspose(s->getSecondTranspose())) {
-        needTranspose = !needTranspose;
-      }
-
-      t = shuffleL2O.at(s);
-    }
-    return t;
-  };
-
-  // 2. Forward analysis to collect the Constant layers connected to MatMul via
-  // Transpose
-  std::unordered_map<IConstantLayer*, bool> constantLayerToSparse;
-  for (auto& o2l : constO2L) {
-    // If need to transpose the weights of the Constant layer.
-    // Need to transpose by default due to semantic difference.
-    bool needTranspose{true};
-    ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose);
-    if (matmulI2L.find(t) == matmulI2L.end()) {
-      continue;
-    }
-
-    // check MatMul params...
-    IMatrixMultiplyLayer* mm =
-        static_cast<IMatrixMultiplyLayer*>(matmulI2L.at(t));
-    bool const twoInputs = mm->getNbInputs() == 2;
-    bool const all2D = is2D(mm->getInput(0)->getDimensions()) &&
-                       is2D(mm->getInput(1)->getDimensions());
-    bool const isSimple = mm->getOperation(0) == MatrixOperation::kNONE &&
-                          mm->getOperation(1) != MatrixOperation::kVECTOR;
-    if (!(twoInputs && all2D && isSimple)) {
-      continue;
-    }
-    if (mm->getOperation(1) == MatrixOperation::kTRANSPOSE) {
-      needTranspose = !needTranspose;
-    }
-
-    constantLayerToSparse.insert(
-        {static_cast<IConstantLayer*>(o2l.second), needTranspose});
-  }
-
-  // 3. Finally, sparsify the weights
-  auto sparsifyConstantWeights = [&sparseWeights](IConstantLayer* layer,
-                                                  bool const needTranspose) {
-    Dims dims = layer->getOutput(0)->getDimensions();
-    ASSERT(dims.nbDims == 2);
-    int32_t const idxN = needTranspose ? 1 : 0;
-    int32_t const n = dims.d[idxN];
-    int32_t const k = dims.d[1 - idxN];
-    sparseWeights.emplace_back();
-    std::vector<char>& spw = sparseWeights.back();
-    Weights w = layer->getWeights();
-    DataType const dtype = w.type;
-    ASSERT(dtype == DataType::kFLOAT ||
-           dtype ==
-               DataType::kHALF); // non-float weights should have been ignored.
-
-    if (needTranspose) {
-      if (dtype == DataType::kFLOAT) {
-        spw.resize(w.count * sizeof(float));
-        transpose2DWeights<float>(spw.data(), w.values, k, n);
-      } else if (dtype == DataType::kHALF) {
-        spw.resize(w.count * sizeof(half_float::half));
-        transpose2DWeights<half_float::half>(spw.data(), w.values, k, n);
-      }
-
-      w.values = spw.data();
-      std::vector<char> tmpW;
-      sparsify(w, n, 1, tmpW);
-
-      if (dtype == DataType::kFLOAT) {
-        transpose2DWeights<float>(spw.data(), tmpW.data(), n, k);
-      } else if (dtype == DataType::kHALF) {
-        transpose2DWeights<half_float::half>(spw.data(), tmpW.data(), n, k);
-      }
-    } else {
-      sparsify(w, n, 1, spw);
-    }
-
-    w.values = spw.data();
-    layer->setWeights(w);
-  };
-  for (auto& l : constantLayerToSparse) {
-    sparsifyConstantWeights(l.first, l.second);
-  }
-}
-
-void sparsify(INetworkDefinition& network,
-              std::vector<std::vector<char>>& sparseWeights) {
-  for (int32_t l = 0; l < network.getNbLayers(); ++l) {
-    auto* layer = network.getLayer(l);
-    const auto t = layer->getType();
-    if (t == LayerType::kCONVOLUTION) {
-      auto& conv = *static_cast<IConvolutionLayer*>(layer);
-      const auto& dims = conv.getKernelSizeNd();
-      if (dims.nbDims > 2) {
-        continue;
-      }
-      const auto k = conv.getNbOutputMaps();
-      const auto rs = dims.d[0] * dims.d[1];
-      sparseWeights.emplace_back();
-      setSparseWeights(conv, k, rs, sparseWeights.back());
-    } else if (t == LayerType::kFULLY_CONNECTED) {
-      auto& fc = *static_cast<IFullyConnectedLayer*>(layer);
-      const auto k = fc.getNbOutputChannels();
-      sparseWeights.emplace_back();
-      setSparseWeights(fc, k, 1, sparseWeights.back());
-    }
-  }
-
-  sparsifyMatMulKernelWeights(network, sparseWeights);
-}
-
-void setLayerPrecisions(INetworkDefinition& network,
-                        LayerPrecisions const& layerPrecisions) {
-  bool const hasGlobalPrecision{layerPrecisions.find("*") !=
-                                layerPrecisions.end()};
-  auto const globalPrecision =
-      hasGlobalPrecision ? layerPrecisions.at("*") : nvinfer1::DataType::kFLOAT;
-  bool hasLayerPrecisionSkipped{false};
-  for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) {
-    auto* layer = network.getLayer(layerIdx);
-    auto const layerName = layer->getName();
-    if (layerPrecisions.find(layer->getName()) != layerPrecisions.end()) {
-      layer->setPrecision(layerPrecisions.at(layer->getName()));
-    } else if (hasGlobalPrecision) {
-      // We should not set the layer precision if its default precision is INT32
-      // or Bool.
-      if (layer->getPrecision() == nvinfer1::DataType::kINT32 ||
-          layer->getPrecision() == nvinfer1::DataType::kBOOL) {
-        hasLayerPrecisionSkipped = true;
-        sample::gLogVerbose << "Skipped setting precision for layer "
-                            << layerName << " because the "
-                            << " default layer precision is INT32 or Bool."
-                            << std::endl;
-        continue;
-      }
-      // We should not set the constant layer precision if its weights are in
-      // INT32.
-      if (layer->getType() == nvinfer1::LayerType::kCONSTANT &&
-          static_cast<IConstantLayer*>(layer)->getWeights().type ==
-              nvinfer1::DataType::kINT32) {
-        hasLayerPrecisionSkipped = true;
-        sample::gLogVerbose << "Skipped setting precision for layer "
-                            << layerName << " because this "
-                            << "constant layer has INT32 weights." << std::endl;
-        continue;
-      }
-      // We should not set the layer precision if the layer operates on a shape
-      // tensor.
-      if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) {
-        hasLayerPrecisionSkipped = true;
-        sample::gLogVerbose << "Skipped setting precision for layer "
-                            << layerName << " because this layer "
-                            << "operates on a shape tensor." << std::endl;
-        continue;
-      }
-      if ((layer->getType() == nvinfer1::LayerType::kIDENTITY ||
-           layer->getType() == nvinfer1::LayerType::kSHUFFLE) &&
-          layer->getNbInputs() >= 1 &&
-          layer->getInput(0)->getType() == nvinfer1::DataType::kINT32 &&
-          layer->getNbOutputs() >= 1 &&
-          layer->getOutput(0)->getType() == nvinfer1::DataType::kINT32) {
-        hasLayerPrecisionSkipped = true;
-        sample::gLogVerbose << "Skipped setting precision for layer "
-                            << layerName << " because this "
-                            << "layer has INT32 input and output." << std::endl;
-        continue;
-      }
-      // All heuristics passed. Set the layer precision.
-      layer->setPrecision(globalPrecision);
-    }
-  }
-
-  if (hasLayerPrecisionSkipped) {
-    sample::gLogInfo << "Skipped setting precisions for some layers. Check "
-                        "verbose logs for more details."
-                     << std::endl;
-  }
-}
-
-void setLayerOutputTypes(INetworkDefinition& network,
-                         LayerOutputTypes const& layerOutputTypes) {
-  bool const hasGlobalOutputType{layerOutputTypes.find("*") !=
-                                 layerOutputTypes.end()};
-  auto const globalOutputType = hasGlobalOutputType
-                                    ? layerOutputTypes.at("*").at(0)
-                                    : nvinfer1::DataType::kFLOAT;
-  bool hasLayerOutputTypeSkipped{false};
-  for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) {
-    auto* layer = network.getLayer(layerIdx);
-    auto const layerName = layer->getName();
-    auto const nbOutputs = layer->getNbOutputs();
-    if (layerOutputTypes.find(layer->getName()) != layerOutputTypes.end()) {
-      auto const& outputTypes = layerOutputTypes.at(layer->getName());
-      bool const isBroadcast = (outputTypes.size() == 1);
-      if (!isBroadcast &&
-          static_cast<int32_t>(outputTypes.size()) != nbOutputs) {
-        sample::gLogError
-            << "Layer " << layerName << " has " << nbOutputs << " outputs but "
-            << outputTypes.size()
-            << " output types are given in --layerOutputTypes flag."
-            << std::endl;
-        throw std::invalid_argument("Invalid --layerOutputTypes flag.");
-      }
-      for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) {
-        layer->setOutputType(outputIdx,
-                             outputTypes.at(isBroadcast ? 0 : outputIdx));
-      }
-    } else if (hasGlobalOutputType) {
-      // We should not set the layer output types if its default precision is
-      // INT32 or Bool.
-      if (layer->getPrecision() == nvinfer1::DataType::kINT32 ||
-          layer->getPrecision() == nvinfer1::DataType::kBOOL) {
-        hasLayerOutputTypeSkipped = true;
-        sample::gLogVerbose << "Skipped setting output types for layer "
-                            << layerName << " because the "
-                            << " default layer precision is INT32 or Bool."
-                            << std::endl;
-        continue;
-      }
-      // We should not set the constant layer output types if its weights are in
-      // INT32.
-      if (layer->getType() == nvinfer1::LayerType::kCONSTANT &&
-          static_cast<IConstantLayer*>(layer)->getWeights().type ==
-              nvinfer1::DataType::kINT32) {
-        hasLayerOutputTypeSkipped = true;
-        sample::gLogVerbose << "Skipped setting output types for layer "
-                            << layerName << " because this "
-                            << "constant layer has INT32 weights." << std::endl;
-        continue;
-      }
-      for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) {
-        // We should not set the output type if the output is a shape tensor.
-        if (layer->getOutput(0)->isShapeTensor()) {
-          hasLayerOutputTypeSkipped = true;
-          sample::gLogVerbose << "Skipped setting output type for output "
-                              << outputIdx << " of layer " << layerName
-                              << " because it is a shape tensor." << std::endl;
-          continue;
-        }
-        layer->setOutputType(outputIdx, globalOutputType);
-      }
-    }
-  }
-
-  if (hasLayerOutputTypeSkipped) {
-    sample::gLogInfo << "Skipped setting output types for some layers. Check "
-                        "verbose logs for more details."
-                     << std::endl;
-  }
-}
-
-void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) {
-  auto const roundToBytes = [](double const sizeInMB) {
-    return static_cast<size_t>(sizeInMB * (1 << 20));
-  };
-  if (build.workspace >= 0) {
-    config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE,
-                              roundToBytes(build.workspace));
-  }
-  if (build.dlaSRAM >= 0) {
-    config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM,
-                              roundToBytes(build.dlaSRAM));
-  }
-  if (build.dlaLocalDRAM >= 0) {
-    config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM,
-                              roundToBytes(build.dlaLocalDRAM));
-  }
-  if (build.dlaGlobalDRAM >= 0) {
-    config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM,
-                              roundToBytes(build.dlaGlobalDRAM));
-  }
-}
-
-} // namespace
-
-bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
-                           IBuilder& builder, INetworkDefinition& network,
-                           IBuilderConfig& config, std::ostream& err,
-                           std::vector<std::vector<char>>& sparseWeights) {
-  IOptimizationProfile* profile{nullptr};
-  if (build.maxBatch) {
-    builder.setMaxBatchSize(build.maxBatch);
-  } else {
-    profile = builder.createOptimizationProfile();
-  }
-
-  bool hasDynamicShapes{false};
-
-  bool broadcastInputFormats =
-      broadcastIOFormats(build.inputFormats, network.getNbInputs());
-
-  if (profile) {
-    // Check if the provided input tensor names match the input tensors of the
-    // engine.
-    // Throw an error if the provided input tensor names cannot be found because
-    // it implies a potential typo.
-    for (const auto& shape : build.shapes) {
-      bool tensorNameFound{false};
-      for (int32_t i = 0; i < network.getNbInputs(); ++i) {
-        if (network.getInput(i)->getName() == shape.first) {
-          tensorNameFound = true;
-          break;
-        }
-      }
-      if (!tensorNameFound) {
-        sample::gLogError
-            << "Cannot find input tensor with name \"" << shape.first
-            << "\" in the network "
-            << "inputs! Please make sure the input tensor names are correct."
-            << std::endl;
-        return false;
-      }
-    }
-  }
-
-  for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) {
-    // Set formats and data types of inputs
-    auto* input = network.getInput(i);
-    if (!build.inputFormats.empty()) {
-      int inputFormatIndex = broadcastInputFormats ? 0 : i;
-      input->setType(build.inputFormats[inputFormatIndex].first);
-      input->setAllowedFormats(build.inputFormats[inputFormatIndex].second);
-    } else {
-      switch (input->getType()) {
-      case DataType::kINT32:
-      case DataType::kBOOL:
-      case DataType::kHALF:
-        // Leave these as is.
-        break;
-      case DataType::kFLOAT:
-      case DataType::kINT8:
-        // User did not specify a floating-point format.  Default to kFLOAT.
-        input->setType(DataType::kFLOAT);
-        break;
-      }
-      input->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
-    }
-
-    if (profile) {
-      auto const dims = input->getDimensions();
-      auto const isScalar = dims.nbDims == 0;
-      auto const isDynamicInput =
-          std::any_of(dims.d, dims.d + dims.nbDims,
-                      [](int32_t dim) { return dim == -1; }) ||
-          input->isShapeTensor();
-      if (isDynamicInput) {
-        hasDynamicShapes = true;
-        auto shape = build.shapes.find(input->getName());
-        ShapeRange shapes{};
-
-        // If no shape is provided, set dynamic dimensions to 1.
-        if (shape == build.shapes.end()) {
-          constexpr int DEFAULT_DIMENSION = 1;
-          std::vector<int> staticDims;
-          if (input->isShapeTensor()) {
-            if (isScalar) {
-              staticDims.push_back(1);
-            } else {
-              staticDims.resize(dims.d[0]);
-              std::fill(staticDims.begin(), staticDims.end(),
-                        DEFAULT_DIMENSION);
-            }
-          } else {
-            staticDims.resize(dims.nbDims);
-            std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
-                           [&](int dimension) {
-                             return dimension > 0 ? dimension
-                                                  : DEFAULT_DIMENSION;
-                           });
-          }
-          sample::gLogWarning
-              << "Dynamic dimensions required for input: " << input->getName()
-              << ", but no shapes were provided. Automatically overriding "
-                 "shape to: "
-              << staticDims << std::endl;
-          std::fill(shapes.begin(), shapes.end(), staticDims);
-        } else {
-          shapes = shape->second;
-        }
-
-        std::vector<int> profileDims{};
-        if (input->isShapeTensor()) {
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMIN)];
-          SMP_RETVAL_IF_FALSE(profile->setShapeValues(
-                                  input->getName(), OptProfileSelector::kMIN,
-                                  profileDims.data(),
-                                  static_cast<int>(profileDims.size())),
-                              "Error in set shape values MIN", false, err);
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kOPT)];
-          SMP_RETVAL_IF_FALSE(profile->setShapeValues(
-                                  input->getName(), OptProfileSelector::kOPT,
-                                  profileDims.data(),
-                                  static_cast<int>(profileDims.size())),
-                              "Error in set shape values OPT", false, err);
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMAX)];
-          SMP_RETVAL_IF_FALSE(profile->setShapeValues(
-                                  input->getName(), OptProfileSelector::kMAX,
-                                  profileDims.data(),
-                                  static_cast<int>(profileDims.size())),
-                              "Error in set shape values MAX", false, err);
-        } else {
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMIN)];
-          SMP_RETVAL_IF_FALSE(
-              profile->setDimensions(input->getName(), OptProfileSelector::kMIN,
-                                     toDims(profileDims)),
-              "Error in set dimensions to profile MIN", false, err);
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kOPT)];
-          SMP_RETVAL_IF_FALSE(
-              profile->setDimensions(input->getName(), OptProfileSelector::kOPT,
-                                     toDims(profileDims)),
-              "Error in set dimensions to profile OPT", false, err);
-          profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMAX)];
-          SMP_RETVAL_IF_FALSE(
-              profile->setDimensions(input->getName(), OptProfileSelector::kMAX,
-                                     toDims(profileDims)),
-              "Error in set dimensions to profile MAX", false, err);
-        }
-      }
-    }
-  }
-
-  if (!hasDynamicShapes && !build.shapes.empty()) {
-    sample::gLogError << "Static model does not take explicit shapes since the "
-                         "shape of inference tensors will be "
-                         "determined by the model itself"
-                      << std::endl;
-    return false;
-  }
-
-  if (profile && hasDynamicShapes) {
-    SMP_RETVAL_IF_FALSE(profile->isValid(),
-                        "Required optimization profile is invalid", false, err);
-    SMP_RETVAL_IF_FALSE(config.addOptimizationProfile(profile) != -1,
-                        "Error in add optimization profile", false, err);
-  }
-
-  bool broadcastOutputFormats =
-      broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false);
-
-  for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) {
-    // Set formats and data types of outputs
-    auto* output = network.getOutput(i);
-    if (!build.outputFormats.empty()) {
-      int outputFormatIndex = broadcastOutputFormats ? 0 : i;
-      output->setType(build.outputFormats[outputFormatIndex].first);
-      output->setAllowedFormats(build.outputFormats[outputFormatIndex].second);
-    } else {
-      output->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
-    }
-  }
-
-  setMemoryPoolLimits(config, build);
-
-  if (build.timingCacheMode == TimingCacheMode::kDISABLE) {
-    config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE);
-  }
-
-  if (!build.tf32) {
-    config.clearFlag(BuilderFlag::kTF32);
-  }
-
-  if (build.refittable) {
-    config.setFlag(BuilderFlag::kREFIT);
-  }
-
-  if (build.sparsity != SparsityFlag::kDISABLE) {
-    config.setFlag(BuilderFlag::kSPARSE_WEIGHTS);
-    if (build.sparsity == SparsityFlag::kFORCE) {
-      sparsify(network, sparseWeights);
-    }
-  }
-
-  config.setProfilingVerbosity(build.profilingVerbosity);
-  config.setMinTimingIterations(build.minTiming);
-  config.setAvgTimingIterations(build.avgTiming);
-
-  if (build.fp16) {
-    config.setFlag(BuilderFlag::kFP16);
-  }
-
-  if (build.int8) {
-    config.setFlag(BuilderFlag::kINT8);
-  }
-
-  if (build.int8 && !build.fp16) {
-    sample::gLogInfo << "FP32 and INT8 precisions have been specified - more "
-                        "performance might be enabled by additionally "
-                        "specifying --fp16 or --best"
-                     << std::endl;
-  }
-
-  auto isInt8 = [](const IOFormat& format) {
-    return format.first == DataType::kINT8;
-  };
-  auto int8IO = std::count_if(build.inputFormats.begin(),
-                              build.inputFormats.end(), isInt8) +
-                std::count_if(build.outputFormats.begin(),
-                              build.outputFormats.end(), isInt8);
-
-  auto hasQDQLayers = [](INetworkDefinition& network) {
-    // Determine if our network has QDQ layers.
-    const auto nbLayers = network.getNbLayers();
-    for (int32_t i = 0; i < nbLayers; i++) {
-      const auto& layer = network.getLayer(i);
-      if (layer->getType() == LayerType::kQUANTIZE ||
-          layer->getType() == LayerType::kDEQUANTIZE) {
-        return true;
-      }
-    }
-    return false;
-  };
-
-  if (!hasQDQLayers(network) && (build.int8 || int8IO) &&
-      build.calibration.empty()) {
-    // Explicitly set int8 scales if no calibrator is provided and if I/O
-    // tensors use int8,
-    // because auto calibration does not support this case.
-    SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network),
-                        "Error in set tensor dynamic range.", false, err);
-  } else if (build.int8) {
-    if (!hasQDQLayers(network) && int8IO) {
-      try {
-        // Set dynamic ranges of int8 inputs / outputs to match scales loaded
-        // from calibration cache
-        // TODO http://nvbugs/3262234 Change the network validation so that this
-        // workaround can be removed
-        setTensorScalesFromCalibration(network, build.inputFormats,
-                                       build.outputFormats, build.calibration);
-      } catch (std::exception&) {
-        sample::gLogError << "Int8IO was specified but impossible to read "
-                             "tensor scales from provided calibration cache "
-                             "file"
-                          << std::endl;
-        return false;
-      }
-    }
-    IOptimizationProfile* profileCalib{nullptr};
-    if (!build.shapesCalib.empty()) {
-      profileCalib = builder.createOptimizationProfile();
-      for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) {
-        auto* input = network.getInput(i);
-        Dims profileDims{};
-        auto shape = build.shapesCalib.find(input->getName());
-        ShapeRange shapesCalib{};
-        shapesCalib = shape->second;
-
-        profileDims =
-            toDims(shapesCalib[static_cast<size_t>(OptProfileSelector::kOPT)]);
-        // Here we check only kMIN as all profileDims are the same.
-        SMP_RETVAL_IF_FALSE(
-            profileCalib->setDimensions(input->getName(),
-                                        OptProfileSelector::kMIN, profileDims),
-            "Error in set dimensions to calibration profile OPT", false, err);
-        profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT,
-                                    profileDims);
-        profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX,
-                                    profileDims);
-      }
-      SMP_RETVAL_IF_FALSE(profileCalib->isValid(),
-                          "Calibration profile is invalid", false, err);
-      SMP_RETVAL_IF_FALSE(config.setCalibrationProfile(profileCalib),
-                          "Error in set calibration profile", false, err);
-    }
-
-    std::vector<int64_t> elemCount{};
-    for (int i = 0; i < network.getNbInputs(); i++) {
-      auto* input = network.getInput(i);
-      auto const dims = input->getDimensions();
-      auto const isDynamicInput = std::any_of(
-          dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; });
-
-      if (profileCalib) {
-        elemCount.push_back(volume(profileCalib->getDimensions(
-            input->getName(), OptProfileSelector::kOPT)));
-      } else if (profile && isDynamicInput) {
-        elemCount.push_back(volume(profile->getDimensions(
-            input->getName(), OptProfileSelector::kOPT)));
-      } else {
-        elemCount.push_back(volume(input->getDimensions()));
-      }
-    }
-
-    config.setInt8Calibrator(
-        new RndInt8Calibrator(1, elemCount, build.calibration, network, err));
-  }
-
-  if (build.directIO) {
-    config.setFlag(BuilderFlag::kDIRECT_IO);
-  }
-
-  switch (build.precisionConstraints) {
-  case PrecisionConstraints::kNONE:
-    // It's the default for TensorRT.
-    break;
-  case PrecisionConstraints::kOBEY:
-    config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS);
-    break;
-  case PrecisionConstraints::kPREFER:
-    config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
-    break;
-  }
-
-  if (!build.layerPrecisions.empty() &&
-      build.precisionConstraints != PrecisionConstraints::kNONE) {
-    setLayerPrecisions(network, build.layerPrecisions);
-  }
-
-  if (!build.layerOutputTypes.empty() &&
-      build.precisionConstraints != PrecisionConstraints::kNONE) {
-    setLayerOutputTypes(network, build.layerOutputTypes);
-  }
-
-  if (build.safe) {
-    config.setEngineCapability(sys.DLACore != -1
-                                   ? EngineCapability::kDLA_STANDALONE
-                                   : EngineCapability::kSAFETY);
-  }
-
-  if (build.restricted) {
-    config.setFlag(BuilderFlag::kSAFETY_SCOPE);
-  }
-
-  if (sys.DLACore != -1) {
-    if (sys.DLACore < builder.getNbDLACores()) {
-      config.setDefaultDeviceType(DeviceType::kDLA);
-      config.setDLACore(sys.DLACore);
-      config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
-
-      if (sys.fallback) {
-        config.setFlag(BuilderFlag::kGPU_FALLBACK);
-      } else {
-        // Reformatting runs on GPU, so avoid I/O reformatting.
-        config.setFlag(BuilderFlag::kDIRECT_IO);
-      }
-      if (!build.int8) {
-        config.setFlag(BuilderFlag::kFP16);
-      }
-    } else {
-      err << "Cannot create DLA engine, " << sys.DLACore << " not available"
-          << std::endl;
-      return false;
-    }
-  }
-
-  if (build.enabledTactics || build.disabledTactics) {
-    TacticSources tacticSources = config.getTacticSources();
-    tacticSources |= build.enabledTactics;
-    tacticSources &= ~build.disabledTactics;
-    config.setTacticSources(tacticSources);
-  }
-
-  return true;
-}
-
-//!
-//! \brief Create an engine for a network defintion
-//!
-//! \return Pointer to the engine created or nullptr if the creation failed
-//!
-bool networkToEngine(const BuildOptions& build, const SystemOptions& sys,
-                     IBuilder& builder, BuildEnvironment& env,
-                     std::ostream& err) {
-  TrtUniquePtr<IBuilderConfig> config{builder.createBuilderConfig()};
-  std::vector<std::vector<char>> sparseWeights;
-  SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err);
-  SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, *env.network,
-                                            *config, err, sparseWeights),
-                      "Network And Config setup failed", false, err);
-
-  std::unique_ptr<ITimingCache> timingCache{nullptr};
-  // Try to load cache from file. Create a fresh cache if the file doesn't exist
-  if (build.timingCacheMode == TimingCacheMode::kGLOBAL) {
-    std::vector<char> loadedCache = loadTimingCacheFile(build.timingCacheFile);
-    timingCache.reset(config->createTimingCache(
-        static_cast<const void*>(loadedCache.data()), loadedCache.size()));
-    SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed",
-                        false, err);
-    config->setTimingCache(*timingCache, false);
-  }
-
-  // CUDA stream used for profiling by the builder.
-  auto profileStream = samplesCommon::makeCudaStream();
-  SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed",
-                      false, err);
-  config->setProfileStream(*profileStream);
-
-  TrtUniquePtr<IHostMemory> serializedEngine{
-      builder.buildSerializedNetwork(*env.network, *config)};
-  SMP_RETVAL_IF_FALSE(serializedEngine != nullptr,
-                      "Engine could not be created from network", false, err);
-
-  env.engineBlob.resize(serializedEngine->size());
-  std::memcpy(env.engineBlob.data(), serializedEngine->data(),
-              serializedEngine->size());
-
-  if (build.safe) {
-    ASSERT(sample::hasSafeRuntime());
-    std::unique_ptr<safe::IRuntime> safeRuntime{
-        sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
-    SMP_RETVAL_IF_FALSE(safeRuntime != nullptr, "SafeRuntime creation failed",
-                        false, err);
-    safeRuntime->setErrorRecorder(&gRecorder);
-    env.safeEngine.reset(safeRuntime->deserializeCudaEngine(
-        serializedEngine->data(), serializedEngine->size()));
-    if (build.consistency) {
-      checkSafeEngine(serializedEngine->data(), serializedEngine->size());
-    }
-    SMP_RETVAL_IF_FALSE(env.safeEngine != nullptr,
-                        "SafeEngine deserialization failed", false, err);
-  } else {
-    TrtUniquePtr<IRuntime> runtime{
-        createInferRuntime(sample::gLogger.getTRTLogger())};
-    SMP_RETVAL_IF_FALSE(runtime != nullptr, "Runtime creation failed", false,
-                        err);
-    runtime->setErrorRecorder(&gRecorder);
-    env.engine.reset(runtime->deserializeCudaEngine(serializedEngine->data(),
-                                                    serializedEngine->size()));
-    SMP_RETVAL_IF_FALSE(env.engine != nullptr, "Engine deserialization failed",
-                        false, err);
-    if (build.timingCacheMode == TimingCacheMode::kGLOBAL) {
-      auto const& timingCache = config->getTimingCache();
-      std::unique_ptr<IHostMemory> timingCacheHostData{
-          timingCache->serialize()};
-      SMP_RETVAL_IF_FALSE(timingCacheHostData != nullptr,
-                          "Timing Cache serialization failed", false, err);
-      saveTimingCacheFile(build.timingCacheFile, timingCacheHostData.get());
-    }
-    if (config->getInt8Calibrator()) {
-      delete config->getInt8Calibrator();
-    }
-  }
-  return true;
-}
-
-//!
-//! \brief Parse a given model, create a network and an engine.
-//!
-bool modelToBuildEnv(const ModelOptions& model, const BuildOptions& build,
-                     const SystemOptions& sys, BuildEnvironment& env,
-                     std::ostream& err) {
-  TrtUniquePtr<IBuilder> builder{
-      createInferBuilder(sample::gLogger.getTRTLogger())};
-  SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", false,
-                      err);
-  builder->setErrorRecorder(&gRecorder);
-  auto networkFlags =
-      (build.maxBatch)
-          ? 0U
-          : 1U << static_cast<uint32_t>(
-                nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-
-  env.network.reset(builder->createNetworkV2(networkFlags));
-  SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false,
-                      err);
-  env.parser = modelToNetwork(model, *env.network, err);
-  SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false,
-                      err);
-  SMP_RETVAL_IF_FALSE(networkToEngine(build, sys, *builder, env, err),
-                      "Building engine failed", false, err);
-  return true;
-}
-
-namespace {
-std::pair<std::vector<std::string>, std::vector<WeightsRole>>
-getLayerWeightsRolePair(IRefitter& refitter) {
-  // Get number of refittable items.
-  auto const nbAll = refitter.getAll(0, nullptr, nullptr);
-  std::vector<char const*> layerNames(nbAll);
-  // Allocate buffers for the items and get them.
-  std::vector<nvinfer1::WeightsRole> weightsRoles(nbAll);
-  refitter.getAll(nbAll, layerNames.data(), weightsRoles.data());
-  std::vector<std::string> layerNameStrs(nbAll);
-  std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(),
-                 [](char const* name) {
-                   if (name == nullptr) {
-                     return std::string{};
-                   }
-                   return std::string{name};
-                 });
-  return {layerNameStrs, weightsRoles};
-}
-
-std::pair<std::vector<std::string>, std::vector<WeightsRole>>
-getMissingLayerWeightsRolePair(IRefitter& refitter) {
-  // Get number of refittable items.
-  auto const nbMissing = refitter.getMissing(0, nullptr, nullptr);
-  std::vector<const char*> layerNames(nbMissing);
-  // Allocate buffers for the items and get them.
-  std::vector<nvinfer1::WeightsRole> weightsRoles(nbMissing);
-  refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data());
-  std::vector<std::string> layerNameStrs(nbMissing);
-  std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(),
-                 [](char const* name) {
-                   if (name == nullptr) {
-                     return std::string{};
-                   }
-                   return std::string{name};
-                 });
-  return {layerNameStrs, weightsRoles};
-}
-
-bool loadEngineToEnv(const std::string& engine, int DLACore, bool safe,
-                     bool enableConsistency, BuildEnvironment& env,
-                     std::ostream& err) {
-  std::ifstream engineFile(engine, std::ios::binary);
-  SMP_RETVAL_IF_FALSE(engineFile.good(), "", false,
-                      err << "Error opening engine file: " << engine);
-  engineFile.seekg(0, std::ifstream::end);
-  int64_t fsize = engineFile.tellg();
-  engineFile.seekg(0, std::ifstream::beg);
-
-  env.engineBlob.resize(fsize);
-  engineFile.read(reinterpret_cast<char*>(env.engineBlob.data()), fsize);
-  SMP_RETVAL_IF_FALSE(engineFile.good(), "", false,
-                      err << "Error loading engine file: " << engine);
-
-  if (safe) {
-    ASSERT(sample::hasSafeRuntime());
-    std::unique_ptr<safe::IRuntime> safeRuntime{
-        sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
-    safeRuntime->setErrorRecorder(&gRecorder);
-    env.safeEngine.reset(
-        safeRuntime->deserializeCudaEngine(env.engineBlob.data(), fsize));
-    bool result = env.safeEngine != nullptr;
-    if (result && enableConsistency) {
-      checkSafeEngine(env.engineBlob.data(), fsize);
-    }
-    return result;
-  }
-
-  TrtUniquePtr<IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  if (DLACore != -1) {
-    runtime->setDLACore(DLACore);
-  }
-  runtime->setErrorRecorder(&gRecorder);
-  env.engine.reset(
-      runtime->deserializeCudaEngine(env.engineBlob.data(), fsize, nullptr));
-  return env.engine != nullptr;
-}
-} // namespace
-
-void dumpRefittable(nvinfer1::ICudaEngine& engine) {
-  TrtUniquePtr<IRefitter> refitter{
-      createInferRefitter(engine, sample::gLogger.getTRTLogger())};
-  if (refitter == nullptr) {
-    sample::gLogError << "Failed to create a refitter." << std::endl;
-    return;
-  }
-
-  auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter);
-  auto const& layerNames = layerWeightsRolePair.first;
-  auto const& weightsRoles = layerWeightsRolePair.second;
-  auto const nbAll = layerWeightsRolePair.first.size();
-  for (size_t i = 0; i < nbAll; ++i) {
-    sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl;
-  }
-}
-
-ICudaEngine* loadEngine(const std::string& engine, int DLACore,
-                        std::ostream& err) {
-  BuildEnvironment env;
-  return loadEngineToEnv(engine, DLACore, false, false, env, err)
-             ? env.engine.release()
-             : nullptr;
-}
-
-bool saveEngine(const ICudaEngine& engine, const std::string& fileName,
-                std::ostream& err) {
-  std::ofstream engineFile(fileName, std::ios::binary);
-  if (!engineFile) {
-    err << "Cannot open engine file: " << fileName << std::endl;
-    return false;
-  }
-
-  TrtUniquePtr<IHostMemory> serializedEngine{engine.serialize()};
-  if (serializedEngine == nullptr) {
-    err << "Engine serialization failed" << std::endl;
-    return false;
-  }
-
-  engineFile.write(static_cast<char*>(serializedEngine->data()),
-                   serializedEngine->size());
-  return !engineFile.fail();
-}
-
-bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
-                       const SystemOptions& sys, BuildEnvironment& env,
-                       std::ostream& err) {
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  TrtUniquePtr<INetworkDefinition> network;
-  Parser parser;
-
-  bool createEngineSuccess{false};
-
-  if (build.load) {
-    createEngineSuccess = loadEngineToEnv(build.engine, sys.DLACore, build.safe,
-                                          build.consistency, env, err);
-  } else {
-    createEngineSuccess = modelToBuildEnv(model, build, sys, env, err);
-  }
-
-  SMP_RETVAL_IF_FALSE(createEngineSuccess,
-                      "Failed to create engine from model.", false, err);
-
-  if (build.save) {
-    std::ofstream engineFile(build.engine, std::ios::binary);
-    engineFile.write(reinterpret_cast<char*>(env.engineBlob.data()),
-                     env.engineBlob.size());
-    SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.",
-                        false, err);
-  }
-  return true;
-}
-
-IHostMemory* networkToSerialized(const BuildOptions& build,
-                                 const SystemOptions& sys, IBuilder& builder,
-                                 INetworkDefinition& network,
-                                 std::ostream& err) {
-  TrtUniquePtr<IBuilderConfig> config{builder.createBuilderConfig()};
-  std::vector<std::vector<char>> sparseWeights;
-  SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", nullptr,
-                      err);
-  SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, network,
-                                            *config, err, sparseWeights),
-                      "Network And Config setup failed", nullptr, err);
-  return builder.buildSerializedNetwork(network, *config);
-}
-
-IHostMemory* modelToSerialized(const ModelOptions& model,
-                               const BuildOptions& build,
-                               const SystemOptions& sys, std::ostream& err) {
-  TrtUniquePtr<IBuilder> builder{
-      createInferBuilder(sample::gLogger.getTRTLogger())};
-  SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", nullptr,
-                      err);
-  builder->setErrorRecorder(&gRecorder);
-
-  auto networkFlags =
-      (build.maxBatch)
-          ? 0U
-          : 1U << static_cast<uint32_t>(
-                nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-
-  TrtUniquePtr<INetworkDefinition> network{
-      builder->createNetworkV2(networkFlags)};
-  SMP_RETVAL_IF_FALSE(network != nullptr, "Network creation failed", nullptr,
-                      err);
-
-  Parser parser = modelToNetwork(model, *network, err);
-  SMP_RETVAL_IF_FALSE(parser.operator bool(), "Parsing model failed", nullptr,
-                      err);
-
-  return networkToSerialized(build, sys, *builder, *network, err);
-}
-
-bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
-                      const SystemOptions& sys, std::ostream& err) {
-  TrtUniquePtr<IHostMemory> serialized{
-      modelToSerialized(model, build, sys, err)};
-  SMP_RETVAL_IF_FALSE(serialized != nullptr, "Network serialization failed",
-                      false, err);
-
-  std::ofstream engineFile(build.engine, std::ios::binary);
-  SMP_RETVAL_IF_FALSE(!!engineFile,
-                      "Cannot open a file to save a serialize network", false,
-                      err);
-  engineFile.write(static_cast<char*>(serialized->data()), serialized->size());
-  return !engineFile.fail();
-}
-
-// There is not a getWeightsName API, so we need to use WeightsRole.
-std::vector<std::pair<WeightsRole, Weights>>
-getAllRefitWeightsForLayer(const ILayer& l) {
-  switch (l.getType()) {
-  case LayerType::kCONSTANT: {
-    const auto& layer = static_cast<const nvinfer1::IConstantLayer&>(l);
-    return {std::make_pair(WeightsRole::kCONSTANT, layer.getWeights())};
-  }
-  case LayerType::kCONVOLUTION: {
-    const auto& layer = static_cast<const nvinfer1::IConvolutionLayer&>(l);
-    return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()),
-            std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())};
-  }
-  case LayerType::kDECONVOLUTION: {
-    const auto& layer = static_cast<const nvinfer1::IDeconvolutionLayer&>(l);
-    return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()),
-            std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())};
-  }
-  case LayerType::kFULLY_CONNECTED: {
-    const auto& layer = static_cast<const nvinfer1::IFullyConnectedLayer&>(l);
-    return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()),
-            std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())};
-  }
-  case LayerType::kSCALE: {
-    const auto& layer = static_cast<const nvinfer1::IScaleLayer&>(l);
-    return {std::make_pair(WeightsRole::kSCALE, layer.getScale()),
-            std::make_pair(WeightsRole::kSHIFT, layer.getShift())};
-  }
-  case LayerType::kRNN_V2:
-  case LayerType::kACTIVATION:
-  case LayerType::kPOOLING:
-  case LayerType::kLRN:
-  case LayerType::kSOFTMAX:
-  case LayerType::kSHUFFLE:
-  case LayerType::kCONCATENATION:
-  case LayerType::kELEMENTWISE:
-  case LayerType::kPLUGIN:
-  case LayerType::kUNARY:
-  case LayerType::kPADDING:
-  case LayerType::kREDUCE:
-  case LayerType::kTOPK:
-  case LayerType::kGATHER:
-  case LayerType::kMATRIX_MULTIPLY:
-  case LayerType::kRAGGED_SOFTMAX:
-  case LayerType::kIDENTITY:
-  case LayerType::kPLUGIN_V2:
-  case LayerType::kSLICE:
-  case LayerType::kFILL:
-  case LayerType::kSHAPE:
-  case LayerType::kPARAMETRIC_RELU:
-  case LayerType::kRESIZE:
-  case LayerType::kTRIP_LIMIT:
-  case LayerType::kRECURRENCE:
-  case LayerType::kITERATOR:
-  case LayerType::kLOOP_OUTPUT:
-  case LayerType::kSELECT:
-  case LayerType::kQUANTIZE:
-  case LayerType::kDEQUANTIZE:
-  case LayerType::kCONDITION:
-  case LayerType::kCONDITIONAL_INPUT:
-  case LayerType::kCONDITIONAL_OUTPUT:
-  case LayerType::kSCATTER:
-  case LayerType::kEINSUM:
-  case LayerType::kASSERTION:
-    return {};
-  }
-  return {};
-}
-
-bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine,
-               bool multiThreading) {
-  using time_point = std::chrono::time_point<std::chrono::steady_clock>;
-  using durationMs = std::chrono::duration<float, std::milli>;
-
-  auto const nbLayers = network.getNbLayers();
-  TrtUniquePtr<IRefitter> refitter{
-      createInferRefitter(engine, sample::gLogger.getTRTLogger())};
-  // Set max threads that can be used by refitter.
-  if (multiThreading && !refitter->setMaxThreads(10)) {
-    sample::gLogError << "Failed to set max threads to refitter." << std::endl;
-    return false;
-  }
-  auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter);
-  // We use std::string instead of const char* since we can have copies of layer
-  // names.
-  std::set<std::pair<std::string, WeightsRole>> layerRoleSet;
-
-  auto const& layerNames = layerWeightsRolePair.first;
-  auto const& weightsRoles = layerWeightsRolePair.second;
-
-  std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(),
-                 std::inserter(layerRoleSet, layerRoleSet.begin()),
-                 [](std::string const& layerName, WeightsRole const role) {
-                   return std::make_pair(layerName, role);
-                 });
-
-  auto const isRefittable = [&layerRoleSet](char const* layerName,
-                                            WeightsRole const role) {
-    return layerRoleSet.find(std::make_pair(layerName, role)) !=
-           layerRoleSet.end();
-  };
-
-  auto const setWeights = [&] {
-    for (int32_t i = 0; i < nbLayers; i++) {
-      auto const layer = network.getLayer(i);
-      auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer);
-      for (auto const& roleWeights : roleWeightsVec) {
-        if (isRefittable(layer->getName(), roleWeights.first)) {
-          bool const success = refitter->setWeights(
-              layer->getName(), roleWeights.first, roleWeights.second);
-          if (!success) {
-            return false;
-          }
-        }
-      }
-    }
-    return true;
-  };
-
-  auto const reportMissingWeights = [&] {
-    auto const& missingPair = getMissingLayerWeightsRolePair(*refitter);
-    auto const& layerNames = missingPair.first;
-    auto const& weightsRoles = missingPair.second;
-    for (size_t i = 0; i < layerNames.size(); ++i) {
-      sample::gLogError << "Missing (" << layerNames[i] << ", "
-                        << weightsRoles[i] << ") for refitting." << std::endl;
-    }
-    return layerNames.empty();
-  };
-
-  // Warm up and report missing weights
-  bool const success =
-      setWeights() && reportMissingWeights() && refitter->refitCudaEngine();
-  if (!success) {
-    return false;
-  }
-
-  constexpr int32_t loop = 10;
-  time_point const refitStartTime{std::chrono::steady_clock::now()};
-  {
-    for (int32_t l = 0; l < loop; l++) {
-      bool const success = setWeights() && refitter->refitCudaEngine();
-      if (!success) {
-        return false;
-      }
-    }
-  }
-  time_point const refitEndTime{std::chrono::steady_clock::now()};
-
-  sample::gLogInfo << "Engine refitted"
-                   << " in "
-                   << durationMs(refitEndTime - refitStartTime).count() / loop
-                   << " ms." << std::endl;
-  return true;
-}
-
-namespace {
-void* initSafeRuntime() {
-  void* handle{nullptr};
-#if !defined(_WIN32)
-  std::string const dllName{samplesCommon::isDebug()
-                                ? "libnvinfer_safe_debug.so.8"
-                                : "libnvinfer_safe.so.8"};
-#if SANITIZER_BUILD
-  handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE);
-#else
-  handle = dlopen(dllName.c_str(), RTLD_LAZY);
-#endif
-#endif
-  return handle;
-}
-
-void* initConsistencyCheckerLibrary() {
-  void* handle{nullptr};
-#if !defined(_WIN32)
-  std::string const dllName{samplesCommon::isDebug()
-                                ? "libnvinfer_checker_debug.so.8"
-                                : "libnvinfer_checker.so.8"};
-#if SANITIZER_BUILD
-  handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE);
-#else
-  handle = dlopen(dllName.c_str(), RTLD_LAZY);
-#endif
-#endif
-  return handle;
-}
-
-#if !defined(_WIN32)
-struct DllDeleter {
-  void operator()(void* handle) {
-    if (handle != nullptr) {
-      dlclose(handle);
-    }
-  }
-};
-const std::unique_ptr<void, DllDeleter> safeRuntimeLibrary{initSafeRuntime()};
-const std::unique_ptr<void, DllDeleter> consistencyCheckerLibrary{
-    initConsistencyCheckerLibrary()};
-#endif
-} // namespace
-
-bool hasSafeRuntime() {
-  bool ret{false};
-#if !defined(_WIN32)
-  ret = (safeRuntimeLibrary != nullptr);
-#endif
-  return ret;
-}
-
-nvinfer1::safe::IRuntime*
-createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept {
-  nvinfer1::safe::IRuntime* runtime{nullptr};
-#if !defined(_WIN32)
-  constexpr char symbolName[] =
-      "_ZN8nvinfer14safe18createInferRuntimeERNS_7ILoggerE";
-  typedef nvinfer1::safe::IRuntime* (*CreateInferRuntimeFn)(nvinfer1::ILogger &
-                                                            logger);
-  if (hasSafeRuntime()) {
-    auto createFn = reinterpret_cast<CreateInferRuntimeFn>(
-        dlsym(safeRuntimeLibrary.get(), symbolName));
-    if (createFn != nullptr) {
-      runtime = createFn(logger);
-    }
-  }
-#endif
-  return runtime;
-}
-
-bool hasConsistencyChecker() {
-  bool ret{false};
-#if !defined(_WIN32)
-  ret = (consistencyCheckerLibrary != nullptr);
-#endif
-  return ret;
-}
-
-nvinfer1::consistency::IConsistencyChecker*
-createConsistencyChecker(nvinfer1::ILogger& logger,
-                         void const* serializedEngine,
-                         int32_t const engineSize) noexcept {
-  nvinfer1::consistency::IConsistencyChecker* checker{nullptr};
-
-  if (serializedEngine == nullptr || engineSize == 0) {
-    return checker;
-  }
-
-#if !defined(_WIN32)
-  constexpr char symbolName[] = "createConsistencyChecker_INTERNAL";
-  typedef nvinfer1::consistency::IConsistencyChecker* (*CreateCheckerFn)(
-      nvinfer1::ILogger * logger, void const* data, size_t size,
-      uint32_t version);
-  if (hasSafeRuntime()) {
-    auto createFn = reinterpret_cast<CreateCheckerFn>(
-        dlsym(consistencyCheckerLibrary.get(), symbolName));
-    if (createFn != nullptr) {
-      checker =
-          createFn(&logger, serializedEngine, engineSize, NV_TENSORRT_VERSION);
-    }
-  }
-#endif
-  return checker;
-}
-
-bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize) {
-  if (!hasConsistencyChecker()) {
-    sample::gLogError << "Cannot perform consistency check because the checker "
-                         "is not loaded.."
-                      << std::endl;
-    return false;
-  }
-  auto checker = std::unique_ptr<nvinfer1::consistency::IConsistencyChecker>(
-      createConsistencyChecker(sample::gLogger.getTRTLogger(), serializedEngine,
-                               engineSize));
-  if (checker.get() == nullptr) {
-    sample::gLogError << "Failed to create consistency checker." << std::endl;
-    return false;
-  }
-  sample::gLogInfo << "Start consistency checking." << std::endl;
-  if (!checker->validate()) {
-    sample::gLogError << "Consistency validation failed." << std::endl;
-    return false;
-  }
-  sample::gLogInfo << "Consistency validation passed." << std::endl;
-  return true;
-}
-} // namespace sample
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h
deleted file mode 100644
index 1b7b7a000..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_ENGINES_H
-#define TRT_SAMPLE_ENGINES_H
-
-#include <iostream>
-#include <vector>
-
-//#include "NvCaffeParser.h"
-#include "NvInfer.h"
-#include "NvInferConsistency.h"
-#include "NvInferSafeRuntime.h"
-#include "NvOnnxParser.h"
-#include "sampleOptions.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-struct Parser {
-//  TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
-  TrtUniquePtr<nvonnxparser::IParser> onnxParser;
-
-  operator bool() const { return false || onnxParser; }
-};
-
-struct BuildEnvironment {
-  TrtUniquePtr<INetworkDefinition> network;
-  //! Parser that creates the network. Must be declared *after* network, so that
-  //! when
-  //! ~BuildEnvironment() executes, the parser is destroyed before the network
-  //! is destroyed.
-  Parser parser;
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
-  std::vector<uint8_t> engineBlob;
-};
-
-//!
-//! \brief Generate a network definition for a given model
-//!
-//! \return Parser The parser used to initialize the network and that holds the
-//! weights for the network, or an invalid
-//! parser (the returned parser converts to false if tested)
-//!
-//! Constant input dimensions in the model must not be changed in the
-//! corresponding
-//! network definition, because its correctness may rely on the constants.
-//!
-//! \see Parser::operator bool()
-//!
-Parser modelToNetwork(const ModelOptions& model,
-                      nvinfer1::INetworkDefinition& network, std::ostream& err);
-
-//!
-//! \brief Set up network and config
-//!
-//! \return boolean Return true if network and config were successfully set
-//!
-bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
-                           IBuilder& builder, INetworkDefinition& network,
-                           IBuilderConfig& config, std::ostream& err,
-                           std::vector<std::vector<char>>& sparseWeights);
-
-//!
-//! \brief Log refittable layers and weights of a refittable engine
-//!
-void dumpRefittable(nvinfer1::ICudaEngine& engine);
-
-//!
-//! \brief Load a serialized engine
-//!
-//! \return Pointer to the engine loaded or nullptr if the operation failed
-//!
-nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
-                                  std::ostream& err);
-
-//!
-//! \brief Save an engine into a file
-//!
-//! \return boolean Return true if the engine was successfully saved
-//!
-bool saveEngine(const nvinfer1::ICudaEngine& engine,
-                const std::string& fileName, std::ostream& err);
-
-//!
-//! \brief Create an engine from model or serialized file, and optionally save
-//! engine
-//!
-//! \return Pointer to the engine created or nullptr if the creation failed
-//!
-bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
-                       const SystemOptions& sys, BuildEnvironment& env,
-                       std::ostream& err);
-
-//!
-//! \brief Create an engine from model or serialized file, and optionally save
-//! engine
-//!
-//! \return Pointer to the engine created or nullptr if the creation failed
-//!
-inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
-                                                     const BuildOptions& build,
-                                                     const SystemOptions& sys,
-                                                     std::ostream& err) {
-  BuildEnvironment env;
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  if (getEngineBuildEnv(model, build, sys, env, err)) {
-    engine.swap(env.engine);
-  }
-  return engine;
-}
-
-//!
-//! \brief Create a serialized network
-//!
-//! \return Pointer to a host memory for a serialized network
-//!
-IHostMemory* networkToSerialized(const BuildOptions& build,
-                                 const SystemOptions& sys, IBuilder& builder,
-                                 INetworkDefinition& network,
-                                 std::ostream& err);
-
-//!
-//! \brief Tranfer model to a serialized network
-//!
-//! \return Pointer to a host memory for a serialized network
-//!
-IHostMemory* modelToSerialized(const ModelOptions& model,
-                               const BuildOptions& build,
-                               const SystemOptions& sys, std::ostream& err);
-
-//!
-//! \brief Serialize network and save it into a file
-//!
-//! \return boolean Return true if the network was successfully serialized and
-//! saved
-//!
-bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
-                      const SystemOptions& sys, std::ostream& err);
-
-bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
-               bool multiThreading);
-
-//!
-//! \brief Set tensor scales from a calibration table
-//!
-void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
-                                    const std::vector<IOFormat>& inputFormats,
-                                    const std::vector<IOFormat>& outputFormats,
-                                    const std::string& calibrationFile);
-
-//!
-//! \brief Check if safe runtime is loaded.
-//!
-bool hasSafeRuntime();
-
-//!
-//! \brief Create a safe runtime object if the dynamic library is loaded.
-//!
-nvinfer1::safe::IRuntime*
-createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
-
-//!
-//! \brief Check if consistency checker is loaded.
-//!
-bool hasConsistencyChecker();
-
-//!
-//! \brief Create a consistency checker object if the dynamic library is loaded.
-//!
-nvinfer1::consistency::IConsistencyChecker*
-createConsistencyChecker(nvinfer1::ILogger& logger,
-                         IHostMemory const* engine) noexcept;
-
-//!
-//! \brief Run consistency check on serialized engine.
-//!
-bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
-} // namespace sample
-
-#endif // TRT_SAMPLE_ENGINES_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp
deleted file mode 100644
index fd7e9f82f..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp
+++ /dev/null
@@ -1,943 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <array>
-#include <chrono>
-#include <cuda_profiler_api.h>
-#include <functional>
-#include <limits>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#if defined(__QNX__)
-#include <sys/neutrino.h>
-#include <sys/syspage.h>
-#endif
-
-#include "NvInfer.h"
-
-#include "ErrorRecorder.h"
-#include "logger.h"
-#include "sampleDevice.h"
-#include "sampleEngines.h"
-#include "sampleInference.h"
-#include "sampleOptions.h"
-#include "sampleReporting.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-template <class MapType, class EngineType>
-bool validateTensorNames(const MapType& map, const EngineType* engine,
-                         const int32_t endBindingIndex) {
-  // Check if the provided input tensor names match the input tensors of the
-  // engine.
-  // Throw an error if the provided input tensor names cannot be found because
-  // it implies a potential typo.
-  for (const auto& item : map) {
-    bool tensorNameFound{false};
-    for (int32_t b = 0; b < endBindingIndex; ++b) {
-      if (engine->bindingIsInput(b) &&
-          engine->getBindingName(b) == item.first) {
-        tensorNameFound = true;
-        break;
-      }
-    }
-    if (!tensorNameFound) {
-      sample::gLogError
-          << "Cannot find input tensor with name \"" << item.first
-          << "\" in the engine bindings! "
-          << "Please make sure the input tensor names are correct."
-          << std::endl;
-      return false;
-    }
-  }
-  return true;
-}
-
-template <class EngineType, class ContextType> class FillBindingClosure {
- private:
-  using InputsMap = std::unordered_map<std::string, std::string>;
-  using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
-
-  EngineType const* engine;
-  ContextType const* context;
-  InputsMap const& inputs;
-  BindingsVector& bindings;
-  int32_t batch;
-  int32_t endBindingIndex;
-
-  void fillOneBinding(int32_t bindingIndex, int64_t vol) {
-    auto const dims = getDims(bindingIndex);
-    auto const name = engine->getBindingName(bindingIndex);
-    auto const isInput = engine->bindingIsInput(bindingIndex);
-    auto const dataType = engine->getBindingDataType(bindingIndex);
-    auto const* bindingInOutStr = isInput ? "input" : "output";
-    for (auto& binding : bindings) {
-      const auto input = inputs.find(name);
-      if (isInput && input != inputs.end()) {
-        sample::gLogInfo << "Using values loaded from " << input->second
-                         << " for input " << name << std::endl;
-        binding->addBinding(bindingIndex, name, isInput, vol, dataType,
-                            input->second);
-      } else {
-        sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
-                         << name << std::endl;
-        binding->addBinding(bindingIndex, name, isInput, vol, dataType);
-      }
-      sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
-                       << name << " with dimensions " << dims << std::endl;
-    }
-  }
-
-  bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
-    if (!validateTensorNames(inputs, engine, endBindingIndex)) {
-      sample::gLogError << "Invalid tensor names found in --loadInputs flag."
-                        << std::endl;
-      return false;
-    }
-
-    for (int32_t b = 0; b < endBindingIndex; b++) {
-      auto const dims = getDims(b);
-      auto const comps = engine->getBindingComponentsPerElement(b);
-      auto const strides = context->getStrides(b);
-      int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
-      auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
-      fillOneBinding(b, vol);
-    }
-    return true;
-  }
-
-  Dims getDims(int32_t bindingIndex);
-
- public:
-  FillBindingClosure(EngineType const* _engine, ContextType const* _context,
-                     InputsMap const& _inputs, BindingsVector& _bindings,
-                     int32_t _batch, int32_t _endBindingIndex)
-      : engine(_engine), context(_context), inputs(_inputs),
-        bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
-
-  bool operator()() { return fillAllBindings(batch, endBindingIndex); }
-};
-
-template <>
-Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
-    getDims(int32_t bindingIndex) {
-  return context->getBindingDimensions(bindingIndex);
-}
-
-template <>
-Dims FillBindingClosure<
-    nvinfer1::safe::ICudaEngine,
-    nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
-  return engine->getBindingDimensions(bindingIndex);
-}
-
-bool setUpInference(InferenceEnvironment& iEnv,
-                    const InferenceOptions& inference) {
-  int32_t device{};
-  cudaCheck(cudaGetDevice(&device));
-
-  cudaDeviceProp properties;
-  cudaCheck(cudaGetDeviceProperties(&properties, device));
-  // Use managed memory on integrated devices when transfers are skipped
-  // and when it is explicitly requested on the commandline.
-  bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
-                        inference.useManaged};
-  using FillSafeBindings =
-      FillBindingClosure<nvinfer1::safe::ICudaEngine,
-                         nvinfer1::safe::IExecutionContext>;
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime());
-    auto* safeEngine = iEnv.safeEngine.get();
-    for (int32_t s = 0; s < inference.streams; ++s) {
-      iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
-      iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
-    }
-    const int32_t nBindings = safeEngine->getNbBindings();
-    auto const* safeContext = iEnv.safeContext.front().get();
-    // batch is set to 1 because safety only support explicit batch.
-    return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
-                            inference.inputs, iEnv.bindings, 1, nBindings)();
-  }
-
-  using FillStdBindings =
-      FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
-
-  for (int32_t s = 0; s < inference.streams; ++s) {
-    auto ec = iEnv.engine->createExecutionContext();
-    if (ec == nullptr) {
-      sample::gLogError << "Unable to create execution context for stream " << s
-                        << "." << std::endl;
-      return false;
-    }
-    iEnv.context.emplace_back(ec);
-    iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
-  }
-  if (iEnv.profiler) {
-    iEnv.context.front()->setProfiler(iEnv.profiler.get());
-    // Always run reportToProfiler() after enqueue launch
-    iEnv.context.front()->setEnqueueEmitsProfile(false);
-  }
-
-  const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
-  const int32_t nBindings = iEnv.engine->getNbBindings();
-  const int32_t bindingsInProfile =
-      nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
-  const int32_t endBindingIndex =
-      bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
-
-  if (nOptProfiles > 1) {
-    sample::gLogWarning << "Multiple profiles are currently not supported. "
-                           "Running with one profile."
-                        << std::endl;
-  }
-
-  // Make sure that the tensor names provided in command-line args actually
-  // exist in any of the engine bindings
-  // to avoid silent typos.
-  if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
-                           endBindingIndex)) {
-    sample::gLogError << "Invalid tensor names found in --shapes flag."
-                      << std::endl;
-    return false;
-  }
-
-  // Set all input dimensions before all bindings can be allocated
-  for (int32_t b = 0; b < endBindingIndex; ++b) {
-    if (iEnv.engine->bindingIsInput(b)) {
-      auto dims = iEnv.context.front()->getBindingDimensions(b);
-      const bool isScalar = dims.nbDims == 0;
-      const bool isDynamicInput =
-          std::any_of(dims.d, dims.d + dims.nbDims,
-                      [](int32_t dim) { return dim == -1; }) ||
-          iEnv.engine->isShapeBinding(b);
-      if (isDynamicInput) {
-        auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
-
-        std::vector<int32_t> staticDims;
-        if (shape == inference.shapes.end()) {
-          // If no shape is provided, set dynamic dimensions to 1.
-          constexpr int32_t DEFAULT_DIMENSION = 1;
-          if (iEnv.engine->isShapeBinding(b)) {
-            if (isScalar) {
-              staticDims.push_back(1);
-            } else {
-              staticDims.resize(dims.d[0]);
-              std::fill(staticDims.begin(), staticDims.end(),
-                        DEFAULT_DIMENSION);
-            }
-          } else {
-            staticDims.resize(dims.nbDims);
-            std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
-                           [&](int32_t dimension) {
-                             return dimension >= 0 ? dimension
-                                                   : DEFAULT_DIMENSION;
-                           });
-          }
-          sample::gLogWarning << "Dynamic dimensions required for input: "
-                              << iEnv.engine->getBindingName(b)
-                              << ", but no shapes were provided. Automatically "
-                                 "overriding shape to: "
-                              << staticDims << std::endl;
-        } else if (inference.inputs.count(shape->first) &&
-                   iEnv.engine->isShapeBinding(b)) {
-          if (isScalar || dims.nbDims == 1) {
-            // Load shape tensor from file.
-            size_t const size = isScalar ? 1 : dims.d[0];
-            staticDims.resize(size);
-            auto const& filename = inference.inputs.at(shape->first);
-            auto dst = reinterpret_cast<char*>(staticDims.data());
-            loadFromFile(filename, dst,
-                         size * sizeof(decltype(staticDims)::value_type));
-          } else {
-            sample::gLogWarning << "Cannot load shape tensor " << shape->first
-                                << " from file, "
-                                << "ND-Shape isn't supported yet" << std::endl;
-            // Fallback
-            staticDims = shape->second;
-          }
-        } else {
-          staticDims = shape->second;
-        }
-
-        for (auto& c : iEnv.context) {
-          if (iEnv.engine->isShapeBinding(b)) {
-            if (!c->setInputShapeBinding(b, staticDims.data())) {
-              return false;
-            }
-          } else {
-            if (!c->setBindingDimensions(b, toDims(staticDims))) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  auto* engine = iEnv.engine.get();
-  auto const* context = iEnv.context.front().get();
-  int32_t const batch =
-      engine->hasImplicitBatchDimension() ? inference.batch : 1;
-  return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
-                         batch, endBindingIndex)();
-}
-
-namespace {
-
-#if defined(__QNX__)
-using TimePoint = double;
-#else
-using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
-#endif
-
-TimePoint getCurrentTime() {
-#if defined(__QNX__)
-  uint64_t const currentCycles = ClockCycles();
-  uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
-  // Return current timestamp in ms.
-  return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
-#else
-  return std::chrono::high_resolution_clock::now();
-#endif
-}
-
-//!
-//! \struct SyncStruct
-//! \brief Threads synchronization structure
-//!
-struct SyncStruct {
-  std::mutex mutex;
-  TrtCudaStream mainStream;
-  TrtCudaEvent gpuStart{cudaEventBlockingSync};
-  TimePoint cpuStart{};
-  float sleep{};
-};
-
-struct Enqueue {
-  explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
-      : mContext(context), mBuffers(buffers) {}
-
-  nvinfer1::IExecutionContext& mContext;
-  void** mBuffers{};
-};
-
-//!
-//! \class EnqueueImplicit
-//! \brief Functor to enqueue inference with implict batch
-//!
-class EnqueueImplicit : private Enqueue {
- public:
-  explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
-                           int32_t batch)
-      : Enqueue(context, buffers), mBatch(batch) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
-          !mContext.reportToProfiler()) {
-        gLogWarning
-            << "Failed to collect layer timing info from previous enqueue()"
-            << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-
- private:
-  int32_t mBatch;
-};
-
-//!
-//! \class EnqueueExplicit
-//! \brief Functor to enqueue inference with explict batch
-//!
-class EnqueueExplicit : private Enqueue {
- public:
-  explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
-      : Enqueue(context, buffers) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
-          !mContext.reportToProfiler()) {
-        gLogWarning
-            << "Failed to collect layer timing info from previous enqueueV2()"
-            << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-};
-
-//!
-//! \class EnqueueGraph
-//! \brief Functor to enqueue inference from CUDA Graph
-//!
-class EnqueueGraph {
- public:
-  explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
-                        TrtCudaGraph& graph)
-      : mGraph(graph), mContext(context) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mGraph.launch(stream)) {
-      // Collecting layer timing info from current profile index of execution
-      // context
-      if (mContext.getProfiler() && !mContext.reportToProfiler()) {
-        gLogWarning << "Failed to collect layer timing info from previous CUDA "
-                       "graph launch"
-                    << std::endl;
-      }
-      return true;
-    }
-    return false;
-  }
-
-  TrtCudaGraph& mGraph;
-  nvinfer1::IExecutionContext& mContext;
-};
-
-//!
-//! \class EnqueueSafe
-//! \brief Functor to enqueue safe execution context
-//!
-class EnqueueSafe {
- public:
-  explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
-                       void** buffers)
-      : mContext(context), mBuffers(buffers) {}
-
-  bool operator()(TrtCudaStream& stream) const {
-    if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
-      return true;
-    }
-    return false;
-  }
-
-  nvinfer1::safe::IExecutionContext& mContext;
-  void** mBuffers{};
-};
-
-using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
-
-enum class StreamType : int32_t {
-  kINPUT = 0,
-  kCOMPUTE = 1,
-  kOUTPUT = 2,
-  kNUM = 3
-};
-
-enum class EventType : int32_t {
-  kINPUT_S = 0,
-  kINPUT_E = 1,
-  kCOMPUTE_S = 2,
-  kCOMPUTE_E = 3,
-  kOUTPUT_S = 4,
-  kOUTPUT_E = 5,
-  kNUM = 6
-};
-
-using MultiStream =
-    std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
-
-using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
-                              static_cast<int32_t>(EventType::kNUM)>;
-
-using EnqueueTimes = std::array<TimePoint, 2>;
-
-//!
-//! \class Iteration
-//! \brief Inference iteration and streams management
-//!
-template <class ContextType> class Iteration {
- public:
-  Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
-            Bindings& bindings)
-      : mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
-        mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
-        mContext(&context) {
-    for (int32_t d = 0; d < mDepth; ++d) {
-      for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
-        mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
-      }
-    }
-    createEnqueueFunction(inference, context, bindings);
-  }
-
-  bool query(bool skipTransfers) {
-    if (mActive[mNext]) {
-      return true;
-    }
-
-    if (!skipTransfers) {
-      record(EventType::kINPUT_S, StreamType::kINPUT);
-      mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
-      record(EventType::kINPUT_E, StreamType::kINPUT);
-      wait(EventType::kINPUT_E,
-           StreamType::kCOMPUTE); // Wait for input DMA before compute
-    }
-
-    record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
-    recordEnqueueTime();
-    if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
-      return false;
-    }
-    recordEnqueueTime();
-    record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
-
-    if (!skipTransfers) {
-      wait(EventType::kCOMPUTE_E,
-           StreamType::kOUTPUT); // Wait for compute before output DMA
-      record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
-      mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
-      record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
-    }
-
-    mActive[mNext] = true;
-    moveNext();
-    return true;
-  }
-
-  float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
-             std::vector<InferenceTrace>& trace, bool skipTransfers) {
-    if (mActive[mNext]) {
-      if (skipTransfers) {
-        getEvent(EventType::kCOMPUTE_E).synchronize();
-      } else {
-        getEvent(EventType::kOUTPUT_E).synchronize();
-      }
-      trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
-      mActive[mNext] = false;
-      return getEvent(EventType::kCOMPUTE_S) - gpuStart;
-    }
-    return 0;
-  }
-
-  void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
-               std::vector<InferenceTrace>& trace, bool skipTransfers) {
-    for (int32_t d = 0; d < mDepth; ++d) {
-      sync(cpuStart, gpuStart, trace, skipTransfers);
-      moveNext();
-    }
-  }
-
-  void wait(TrtCudaEvent& gpuStart) {
-    getStream(StreamType::kINPUT).wait(gpuStart);
-  }
-
-  void setInputData() {
-    mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
-  }
-
-  void fetchOutputData() {
-    mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
-  }
-
- private:
-  void moveNext() { mNext = mDepth - 1 - mNext; }
-
-  TrtCudaStream& getStream(StreamType t) {
-    return mStream[static_cast<int32_t>(t)];
-  }
-
-  TrtCudaEvent& getEvent(EventType t) {
-    return *mEvents[mNext][static_cast<int32_t>(t)];
-  }
-
-  void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
-
-  void recordEnqueueTime() {
-    mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
-    enqueueStart = 1 - enqueueStart;
-  }
-
-  TimePoint getEnqueueTime(bool start) {
-    return mEnqueueTimes[mNext][start ? 0 : 1];
-  }
-
-  void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
-
-  InferenceTrace getTrace(const TimePoint& cpuStart,
-                          const TrtCudaEvent& gpuStart, bool skipTransfers) {
-    float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
-                             : getEvent(EventType::kINPUT_S) - gpuStart;
-    float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
-                             : getEvent(EventType::kINPUT_E) - gpuStart;
-    float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
-                             : getEvent(EventType::kOUTPUT_S) - gpuStart;
-    float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
-                             : getEvent(EventType::kOUTPUT_E) - gpuStart;
-
-    return InferenceTrace(mStreamId,
-                          std::chrono::duration<float, std::milli>(
-                              getEnqueueTime(true) - cpuStart)
-                              .count(),
-                          std::chrono::duration<float, std::milli>(
-                              getEnqueueTime(false) - cpuStart)
-                              .count(),
-                          is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
-                          getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
-  }
-
-  void createEnqueueFunction(const InferenceOptions& inference,
-                             nvinfer1::IExecutionContext& context,
-                             Bindings& bindings) {
-    if (inference.batch) {
-      mEnqueue = EnqueueFunction(EnqueueImplicit(
-          context, mBindings.getDeviceBuffers(), inference.batch));
-    } else {
-      mEnqueue = EnqueueFunction(
-          EnqueueExplicit(context, mBindings.getDeviceBuffers()));
-    }
-    if (inference.graph) {
-      TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
-      // Avoid capturing initialization calls by executing the enqueue function
-      // at least
-      // once before starting CUDA graph capture.
-      const auto ret = mEnqueue(stream);
-      assert(ret);
-      stream.synchronize();
-
-      mGraph.beginCapture(stream);
-      // The built TRT engine may contain operations that are not permitted
-      // under CUDA graph capture mode.
-      // When the stream is capturing, the enqueue call may return false if the
-      // current CUDA graph capture fails.
-      if (mEnqueue(stream)) {
-        mGraph.endCapture(stream);
-        mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
-      } else {
-        mGraph.endCaptureOnError(stream);
-        // Ensure any CUDA error has been cleaned up.
-        cudaCheck(cudaGetLastError());
-        sample::gLogWarning << "The built TensorRT engine contains operations "
-                               "that are not permitted under "
-                               "CUDA graph capture mode."
-                            << std::endl;
-        sample::gLogWarning << "The specified --useCudaGraph flag has been "
-                               "ignored. The inference will be "
-                               "launched without using CUDA graph launch."
-                            << std::endl;
-      }
-    }
-  }
-
-  void createEnqueueFunction(const InferenceOptions&,
-                             nvinfer1::safe::IExecutionContext& context,
-                             Bindings&) {
-    mEnqueue =
-        EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
-  }
-
-  Bindings& mBindings;
-
-  TrtCudaGraph mGraph;
-  EnqueueFunction mEnqueue;
-
-  int32_t mStreamId{0};
-  int32_t mNext{0};
-  int32_t mDepth{2}; // default to double buffer to hide DMA transfers
-
-  std::vector<bool> mActive;
-  MultiStream mStream;
-  std::vector<MultiEvent> mEvents;
-
-  int32_t enqueueStart{0};
-  std::vector<EnqueueTimes> mEnqueueTimes;
-  ContextType* mContext{nullptr};
-};
-
-template <class ContextType>
-bool inferenceLoop(
-    std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
-    const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
-    float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
-    bool skipTransfers, float idleMs) {
-  float durationMs = 0;
-  int32_t skip = 0;
-
-  for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
-       ++i) {
-    for (auto& s : iStreams) {
-      if (!s->query(skipTransfers)) {
-        return false;
-      }
-    }
-    for (auto& s : iStreams) {
-      durationMs = std::max(durationMs,
-                            s->sync(cpuStart, gpuStart, trace, skipTransfers));
-    }
-    if (durationMs < warmupMs) // Warming up
-    {
-      if (durationMs) // Skip complete iterations
-      {
-        ++skip;
-      }
-      continue;
-    }
-    if (idleMs != 0.F) {
-      std::this_thread::sleep_for(
-          std::chrono::duration<float, std::milli>(idleMs));
-    }
-  }
-  for (auto& s : iStreams) {
-    s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
-  }
-  return true;
-}
-
-template <class ContextType>
-void inferenceExecution(const InferenceOptions& inference,
-                        InferenceEnvironment& iEnv, SyncStruct& sync,
-                        const int32_t threadIdx, const int32_t streamsPerThread,
-                        int32_t device, std::vector<InferenceTrace>& trace) {
-  float warmupMs = inference.warmup;
-  float durationMs = inference.duration * 1000.F + warmupMs;
-
-  cudaCheck(cudaSetDevice(device));
-
-  std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
-
-  for (int32_t s = 0; s < streamsPerThread; ++s) {
-    const int32_t streamId{threadIdx * streamsPerThread + s};
-    auto* iteration = new Iteration<ContextType>(
-        streamId, inference, *iEnv.template getContext<ContextType>(streamId),
-        *iEnv.bindings[streamId]);
-    if (inference.skipTransfers) {
-      iteration->setInputData();
-    }
-    iStreams.emplace_back(iteration);
-  }
-
-  for (auto& s : iStreams) {
-    s->wait(sync.gpuStart);
-  }
-
-  std::vector<InferenceTrace> localTrace;
-  if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
-                     inference.iterations, durationMs, warmupMs, localTrace,
-                     inference.skipTransfers, inference.idle)) {
-    iEnv.error = true;
-  }
-
-  if (inference.skipTransfers) {
-    for (auto& s : iStreams) {
-      s->fetchOutputData();
-    }
-  }
-
-  sync.mutex.lock();
-  trace.insert(trace.end(), localTrace.begin(), localTrace.end());
-  sync.mutex.unlock();
-}
-
-inline std::thread makeThread(const InferenceOptions& inference,
-                              InferenceEnvironment& iEnv, SyncStruct& sync,
-                              int32_t threadIdx, int32_t streamsPerThread,
-                              int32_t device,
-                              std::vector<InferenceTrace>& trace) {
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime());
-    return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
-                       std::cref(inference), std::ref(iEnv), std::ref(sync),
-                       threadIdx, streamsPerThread, device, std::ref(trace));
-  }
-
-  return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
-                     std::cref(inference), std::ref(iEnv), std::ref(sync),
-                     threadIdx, streamsPerThread, device, std::ref(trace));
-}
-
-} // namespace
-
-bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
-                  int32_t device, std::vector<InferenceTrace>& trace) {
-  cudaCheck(cudaProfilerStart());
-
-  trace.resize(0);
-
-  SyncStruct sync;
-  sync.sleep = inference.sleep;
-  sync.mainStream.sleep(&sync.sleep);
-  sync.cpuStart = getCurrentTime();
-  sync.gpuStart.record(sync.mainStream);
-
-  // When multiple streams are used, trtexec can run inference in two modes:
-  // (1) if inference.threads is true, then run each stream on each thread.
-  // (2) if inference.threads is false, then run all streams on the same thread.
-  const int32_t numThreads = inference.threads ? inference.streams : 1;
-  const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
-
-  std::vector<std::thread> threads;
-  for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
-    threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
-                                    streamsPerThread, device, trace));
-  }
-  for (auto& th : threads) {
-    th.join();
-  }
-
-  cudaCheck(cudaProfilerStop());
-
-  auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
-    return a.h2dStart < b.h2dStart;
-  };
-  std::sort(trace.begin(), trace.end(), cmpTrace);
-
-  return !iEnv.error;
-}
-
-namespace {
-size_t reportGpuMemory() {
-  static size_t prevFree{0};
-  size_t free{0};
-  size_t total{0};
-  size_t newlyAllocated{0};
-  cudaCheck(cudaMemGetInfo(&free, &total));
-  sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
-  if (prevFree != 0) {
-    newlyAllocated = (prevFree - free);
-    sample::gLogInfo << ", newly allocated GPU memory = "
-                     << newlyAllocated / 1024.0_MiB << " GiB";
-  }
-  sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
-                   << std::endl;
-  prevFree = free;
-  return newlyAllocated;
-}
-} // namespace
-
-//! Returns true if deserialization is slower than expected or fails.
-bool timeDeserialize(InferenceEnvironment& iEnv) {
-  constexpr int32_t kNB_ITERS{20};
-  std::unique_ptr<IRuntime> rt{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  std::unique_ptr<ICudaEngine> engine;
-
-  std::unique_ptr<safe::IRuntime> safeRT{
-      sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
-  std::unique_ptr<safe::ICudaEngine> safeEngine;
-
-  if (iEnv.safe) {
-    ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
-    safeRT->setErrorRecorder(&gRecorder);
-  }
-
-  auto timeDeserializeFn = [&]() -> float {
-    bool deserializeOK{false};
-    engine.reset(nullptr);
-    safeEngine.reset(nullptr);
-    auto startClock = std::chrono::high_resolution_clock::now();
-    if (iEnv.safe) {
-      safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
-                                                     iEnv.engineBlob.size()));
-      deserializeOK = (safeEngine != nullptr);
-    } else {
-      engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
-                                             iEnv.engineBlob.size(), nullptr));
-      deserializeOK = (engine != nullptr);
-    }
-    auto endClock = std::chrono::high_resolution_clock::now();
-    // return NAN if deserialization failed.
-    return deserializeOK
-               ? std::chrono::duration<float, std::milli>(endClock - startClock)
-                     .count()
-               : NAN;
-  };
-
-  // Warmup the caches to make sure that cache thrashing isn't throwing off the
-  // results
-  {
-    sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
-    for (int32_t i = 0, e = 2; i < e; ++i) {
-      timeDeserializeFn();
-    }
-  }
-  sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
-  float const first = timeDeserializeFn();
-
-  // Check if first deserialization suceeded.
-  if (std::isnan(first)) {
-    sample::gLogError << "Engine deserialization failed." << std::endl;
-    return true;
-  }
-
-  sample::gLogInfo << "First deserialization time = " << first
-                   << " milliseconds" << std::endl;
-
-  // Record initial gpu memory state.
-  reportGpuMemory();
-
-  float totalTime{0.F};
-  for (int32_t i = 0; i < kNB_ITERS; ++i) {
-    totalTime += timeDeserializeFn();
-  }
-  const auto averageTime = totalTime / kNB_ITERS;
-  // reportGpuMemory sometimes reports zero after a single deserialization of a
-  // small engine,
-  // so use the size of memory for all the iterations.
-  const auto totalEngineSizeGpu = reportGpuMemory();
-  sample::gLogInfo << "Total deserialization time = " << totalTime
-                   << " milliseconds in " << kNB_ITERS
-                   << " iterations, average time = " << averageTime
-                   << " milliseconds, first time = " << first
-                   << " milliseconds." << std::endl;
-  sample::gLogInfo << "Deserialization Bandwidth = "
-                   << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
-                   << std::endl;
-
-  // If the first deserialization is more than tolerance slower than
-  // the average deserialization, return true, which means an error occurred.
-  // The tolerance is set to 2x since the deserialization time is quick and
-  // susceptible
-  // to caching issues causing problems in the first timing.
-  const auto tolerance = 2.0F;
-  const bool isSlowerThanExpected = first > averageTime * tolerance;
-  if (isSlowerThanExpected) {
-    sample::gLogInfo << "First deserialization time divided by average time is "
-                     << (first / averageTime) << ". Exceeds tolerance of "
-                     << tolerance << "x." << std::endl;
-  }
-  return isSlowerThanExpected;
-}
-
-std::string getLayerInformation(const InferenceEnvironment& iEnv,
-                                nvinfer1::LayerInformationFormat format) {
-  auto runtime = std::unique_ptr<IRuntime>(
-      createInferRuntime(sample::gLogger.getTRTLogger()));
-  auto inspector =
-      std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
-  if (!iEnv.context.empty()) {
-    inspector->setExecutionContext(iEnv.context.front().get());
-  }
-  std::string result = inspector->getEngineInformation(format);
-  return result;
-}
-
-} // namespace sample
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h
deleted file mode 100644
index 700dc8bef..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_INFERENCE_H
-#define TRT_SAMPLE_INFERENCE_H
-
-#include "sampleReporting.h"
-#include "sampleUtils.h"
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "NvInfer.h"
-#include "NvInferSafeRuntime.h"
-
-namespace sample {
-
-struct InferenceEnvironment {
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine;
-  std::unique_ptr<Profiler> profiler;
-  std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
-  std::vector<std::unique_ptr<Bindings>> bindings;
-  bool error{false};
-
-  std::vector<uint8_t> engineBlob;
-
-  bool safe{false};
-  std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
-  std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
-
-  template <class ContextType>
-  inline ContextType* getContext(int32_t streamIdx);
-};
-
-template <>
-inline nvinfer1::IExecutionContext*
-InferenceEnvironment::getContext(int32_t streamIdx) {
-  return context[streamIdx].get();
-}
-
-template <>
-inline nvinfer1::safe::IExecutionContext*
-InferenceEnvironment::getContext(int32_t streamIdx) {
-  return safeContext[streamIdx].get();
-}
-
-//!
-//! \brief Set up contexts and bindings for inference
-//!
-bool setUpInference(InferenceEnvironment& iEnv,
-                    const InferenceOptions& inference);
-
-//!
-//! \brief Deserialize the engine and time how long it takes.
-//!
-bool timeDeserialize(InferenceEnvironment& iEnv);
-
-//!
-//! \brief Run inference and collect timing, return false if any error hit
-//! during inference
-//!
-bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
-                  int32_t device, std::vector<InferenceTrace>& trace);
-
-//!
-//! \brief Get layer information of the engine.
-//!
-std::string getLayerInformation(const InferenceEnvironment& iEnv,
-                                nvinfer1::LayerInformationFormat format);
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_INFERENCE_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
deleted file mode 100644
index a01b4dfde..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp
+++ /dev/null
@@ -1,1634 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <cctype>
-#include <cstring>
-#include <functional>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-#include "NvInfer.h"
-
-#include "logger.h"
-#include "sampleOptions.h"
-
-namespace sample {
-
-namespace {
-
-std::vector<std::string> splitToStringVec(const std::string& option,
-                                          char separator) {
-  std::vector<std::string> options;
-
-  for (size_t start = 0; start < option.length();) {
-    size_t separatorIndex = option.find(separator, start);
-    if (separatorIndex == std::string::npos) {
-      separatorIndex = option.length();
-    }
-    options.emplace_back(option.substr(start, separatorIndex - start));
-    start = separatorIndex + 1;
-  }
-
-  return options;
-}
-
-template <typename T> T stringToValue(const std::string& option) {
-  return T{option};
-}
-
-template <> int32_t stringToValue<int32_t>(const std::string& option) {
-  return std::stoi(option);
-}
-
-template <> float stringToValue<float>(const std::string& option) {
-  return std::stof(option);
-}
-
-template <> double stringToValue<double>(const std::string& option) {
-  return std::stod(option);
-}
-
-template <> bool stringToValue<bool>(const std::string& option) { return true; }
-
-template <>
-std::vector<int32_t>
-stringToValue<std::vector<int32_t>>(const std::string& option) {
-  std::vector<int32_t> shape;
-  std::vector<std::string> dimsStrings = splitToStringVec(option, 'x');
-  for (const auto& d : dimsStrings) {
-    shape.push_back(stringToValue<int32_t>(d));
-  }
-  return shape;
-}
-
-template <>
-nvinfer1::DataType
-stringToValue<nvinfer1::DataType>(const std::string& option) {
-  const std::unordered_map<std::string, nvinfer1::DataType> strToDT{
-      {"fp32", nvinfer1::DataType::kFLOAT},
-      {"fp16", nvinfer1::DataType::kHALF},
-      {"int8", nvinfer1::DataType::kINT8},
-      {"int32", nvinfer1::DataType::kINT32}};
-  const auto& dt = strToDT.find(option);
-  if (dt == strToDT.end()) {
-    throw std::invalid_argument("Invalid DataType " + option);
-  }
-  return dt->second;
-}
-
-template <>
-nvinfer1::TensorFormats
-stringToValue<nvinfer1::TensorFormats>(const std::string& option) {
-  std::vector<std::string> optionStrings = splitToStringVec(option, '+');
-  const std::unordered_map<std::string, nvinfer1::TensorFormat> strToFmt{
-      {"chw", nvinfer1::TensorFormat::kLINEAR},
-      {"chw2", nvinfer1::TensorFormat::kCHW2},
-      {"chw4", nvinfer1::TensorFormat::kCHW4},
-      {"hwc8", nvinfer1::TensorFormat::kHWC8},
-      {"chw16", nvinfer1::TensorFormat::kCHW16},
-      {"chw32", nvinfer1::TensorFormat::kCHW32},
-      {"dhwc8", nvinfer1::TensorFormat::kDHWC8},
-      {"hwc", nvinfer1::TensorFormat::kHWC},
-      {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR},
-      {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}};
-  nvinfer1::TensorFormats formats{};
-  for (auto f : optionStrings) {
-    const auto& tf = strToFmt.find(f);
-    if (tf == strToFmt.end()) {
-      throw std::invalid_argument(std::string("Invalid TensorFormat ") + f);
-    }
-    formats |= 1U << static_cast<int32_t>(tf->second);
-  }
-
-  return formats;
-}
-
-template <> IOFormat stringToValue<IOFormat>(const std::string& option) {
-  IOFormat ioFormat{};
-  const size_t colon = option.find(':');
-
-  if (colon == std::string::npos) {
-    throw std::invalid_argument(std::string("Invalid IOFormat ") + option);
-  }
-
-  ioFormat.first = stringToValue<nvinfer1::DataType>(option.substr(0, colon));
-  ioFormat.second =
-      stringToValue<nvinfer1::TensorFormats>(option.substr(colon + 1));
-
-  return ioFormat;
-}
-
-template <typename T>
-std::pair<std::string, T> splitNameAndValue(const std::string& s) {
-  std::string tensorName;
-  std::string valueString;
-  // Split on the last :
-  std::vector<std::string> nameRange{splitToStringVec(s, ':')};
-  // Everything before the last : is the name
-  tensorName = nameRange[0];
-  for (size_t i = 1; i < nameRange.size() - 1; i++) {
-    tensorName += ":" + nameRange[i];
-  }
-  // Value is the string element after the last :
-  valueString = nameRange[nameRange.size() - 1];
-  return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString));
-}
-
-template <typename T>
-void splitInsertKeyValue(const std::vector<std::string>& kvList, T& map) {
-  for (const auto& kv : kvList) {
-    map.insert(splitNameAndValue<typename T::mapped_type>(kv));
-  }
-}
-
-const char* boolToEnabled(bool enable) {
-  return enable ? "Enabled" : "Disabled";
-}
-
-//! Check if input option exists in input arguments.
-//! If it does: return its value, erase the argument and return true.
-//! If it does not: return false.
-template <typename T>
-bool getAndDelOption(Arguments& arguments, const std::string& option,
-                     T& value) {
-  const auto match = arguments.find(option);
-  if (match != arguments.end()) {
-    value = stringToValue<T>(match->second);
-    arguments.erase(match);
-    return true;
-  }
-
-  return false;
-}
-
-//! Check if input option exists in input arguments.
-//! If it does: return false in value, erase the argument and return true.
-//! If it does not: return false.
-bool getAndDelNegOption(Arguments& arguments, const std::string& option,
-                        bool& value) {
-  bool dummy;
-  if (getAndDelOption(arguments, option, dummy)) {
-    value = false;
-    return true;
-  }
-  return false;
-}
-
-//! Check if input option exists in input arguments.
-//! If it does: add all the matched arg values to values vector, erase the
-//! argument and return true.
-//! If it does not: return false.
-template <typename T>
-bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option,
-                             std::vector<T>& values) {
-  const auto match = arguments.equal_range(option);
-  if (match.first == match.second) {
-    return false;
-  }
-
-  auto addToValues = [&values](Arguments::value_type& argValue) {
-    values.emplace_back(stringToValue<T>(argValue.second));
-  };
-  std::for_each(match.first, match.second, addToValues);
-  arguments.erase(match.first, match.second);
-
-  return true;
-}
-
-void insertShapesBuild(std::unordered_map<std::string, ShapeRange>& shapes,
-                       nvinfer1::OptProfileSelector selector,
-                       const std::string& name,
-                       const std::vector<int32_t>& dims) {
-  shapes[name][static_cast<size_t>(selector)] = dims;
-}
-
-void insertShapesInference(
-    std::unordered_map<std::string, std::vector<int32_t>>& shapes,
-    const std::string& name, const std::vector<int32_t>& dims) {
-  shapes[name] = dims;
-}
-
-std::string removeSingleQuotationMarks(std::string& str) {
-  std::vector<std::string> strList{splitToStringVec(str, '\'')};
-  // Remove all the escaped single quotation marks
-  std::string retVal = "";
-  // Do not really care about unterminated sequences
-  for (size_t i = 0; i < strList.size(); i++) {
-    retVal += strList[i];
-  }
-  return retVal;
-}
-
-void getLayerPrecisions(Arguments& arguments, char const* argument,
-                        LayerPrecisions& layerPrecisions) {
-  std::string list;
-  if (!getAndDelOption(arguments, argument, list)) {
-    return;
-  }
-
-  // The layerPrecisions flag contains comma-separated layerName:precision
-  // pairs.
-  std::vector<std::string> precisionList{splitToStringVec(list, ',')};
-  for (auto const& s : precisionList) {
-    auto namePrecisionPair = splitNameAndValue<nvinfer1::DataType>(s);
-    auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first);
-    layerPrecisions[layerName] = namePrecisionPair.second;
-  }
-}
-
-void getLayerOutputTypes(Arguments& arguments, char const* argument,
-                         LayerOutputTypes& layerOutputTypes) {
-  std::string list;
-  if (!getAndDelOption(arguments, argument, list)) {
-    return;
-  }
-
-  // The layerOutputTypes flag contains comma-separated layerName:types pairs.
-  std::vector<std::string> precisionList{splitToStringVec(list, ',')};
-  for (auto const& s : precisionList) {
-    auto namePrecisionPair = splitNameAndValue<std::string>(s);
-    auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first);
-    auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+');
-    std::vector<nvinfer1::DataType> typeVec(typeStrings.size(),
-                                            nvinfer1::DataType::kFLOAT);
-    std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(),
-                   stringToValue<nvinfer1::DataType>);
-    layerOutputTypes[layerName] = typeVec;
-  }
-}
-
-bool getShapesBuild(Arguments& arguments,
-                    std::unordered_map<std::string, ShapeRange>& shapes,
-                    char const* argument,
-                    nvinfer1::OptProfileSelector selector) {
-  std::string list;
-  bool retVal = getAndDelOption(arguments, argument, list);
-  std::vector<std::string> shapeList{splitToStringVec(list, ',')};
-  for (const auto& s : shapeList) {
-    auto nameDimsPair = splitNameAndValue<std::vector<int32_t>>(s);
-    auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
-    auto dims = nameDimsPair.second;
-    insertShapesBuild(shapes, selector, tensorName, dims);
-  }
-  return retVal;
-}
-
-bool getShapesInference(
-    Arguments& arguments,
-    std::unordered_map<std::string, std::vector<int32_t>>& shapes,
-    const char* argument) {
-  std::string list;
-  bool retVal = getAndDelOption(arguments, argument, list);
-  std::vector<std::string> shapeList{splitToStringVec(list, ',')};
-  for (const auto& s : shapeList) {
-    auto nameDimsPair = splitNameAndValue<std::vector<int32_t>>(s);
-    auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
-    auto dims = nameDimsPair.second;
-    insertShapesInference(shapes, tensorName, dims);
-  }
-  return retVal;
-}
-
-void processShapes(std::unordered_map<std::string, ShapeRange>& shapes,
-                   bool minShapes, bool optShapes, bool maxShapes, bool calib) {
-  // Only accept optShapes only or all three of minShapes, optShapes, maxShapes
-  if (((minShapes || maxShapes) && !optShapes)   // minShapes only, maxShapes
-                                                 // only, both minShapes and
-                                                 // maxShapes
-      || (minShapes && !maxShapes && optShapes)  // both minShapes and optShapes
-      || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes
-  {
-    if (calib) {
-      throw std::invalid_argument(
-          "Must specify only --optShapesCalib or all of --minShapesCalib, "
-          "--optShapesCalib, --maxShapesCalib");
-    } else {
-      throw std::invalid_argument(
-          "Must specify only --optShapes or all of --minShapes, --optShapes, "
-          "--maxShapes");
-    }
-  }
-
-  // If optShapes only, expand optShapes to minShapes and maxShapes
-  if (optShapes && !minShapes && !maxShapes) {
-    std::unordered_map<std::string, ShapeRange> newShapes;
-    for (auto& s : shapes) {
-      insertShapesBuild(
-          newShapes, nvinfer1::OptProfileSelector::kMIN, s.first,
-          s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
-      insertShapesBuild(
-          newShapes, nvinfer1::OptProfileSelector::kOPT, s.first,
-          s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
-      insertShapesBuild(
-          newShapes, nvinfer1::OptProfileSelector::kMAX, s.first,
-          s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
-    }
-    shapes = newShapes;
-  }
-}
-
-template <typename T>
-void printShapes(std::ostream& os, const char* phase, const T& shapes) {
-  if (shapes.empty()) {
-    os << "Input " << phase << " shapes: model" << std::endl;
-  } else {
-    for (const auto& s : shapes) {
-      os << "Input " << phase << " shape: " << s.first << "=" << s.second
-         << std::endl;
-    }
-  }
-}
-
-std::ostream& printBatch(std::ostream& os, int32_t maxBatch) {
-  if (maxBatch != maxBatchNotProvided) {
-    os << maxBatch;
-  } else {
-    os << "explicit batch";
-  }
-  return os;
-}
-
-std::ostream& printTacticSources(std::ostream& os,
-                                 nvinfer1::TacticSources enabledSources,
-                                 nvinfer1::TacticSources disabledSources) {
-  if (!enabledSources && !disabledSources) {
-    os << "Using default tactic sources";
-  } else {
-    auto const addSource = [&](uint32_t source, std::string const& name) {
-      if (enabledSources & source) {
-        os << name << " [ON], ";
-      } else if (disabledSources & source) {
-        os << name << " [OFF], ";
-      }
-    };
-
-    addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS),
-              "cublas");
-    addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS_LT),
-              "cublasLt");
-    addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUDNN),
-              "cudnn");
-  }
-  return os;
-}
-
-std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) {
-  os << "FP32";
-  if (options.fp16) {
-    os << "+FP16";
-  }
-  if (options.int8) {
-    os << "+INT8";
-  }
-  if (options.precisionConstraints == PrecisionConstraints::kOBEY) {
-    os << " (obey precision constraints)";
-  }
-  if (options.precisionConstraints == PrecisionConstraints::kPREFER) {
-    os << " (prefer precision constraints)";
-  }
-  return os;
-}
-
-std::ostream& printTimingCache(std::ostream& os, BuildOptions const& options) {
-  switch (options.timingCacheMode) {
-  case TimingCacheMode::kGLOBAL:
-    os << "global";
-    break;
-  case TimingCacheMode::kLOCAL:
-    os << "local";
-    break;
-  case TimingCacheMode::kDISABLE:
-    os << "disable";
-    break;
-  }
-  return os;
-}
-
-std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) {
-  switch (options.sparsity) {
-  case SparsityFlag::kDISABLE:
-    os << "Disabled";
-    break;
-  case SparsityFlag::kENABLE:
-    os << "Enabled";
-    break;
-  case SparsityFlag::kFORCE:
-    os << "Forced";
-    break;
-  }
-
-  return os;
-}
-
-std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) {
-  auto const printValueOrDefault = [&os](double const val) {
-    if (val >= 0) {
-      os << val << " MiB";
-    } else {
-      os << "default";
-    }
-  };
-  os << "workspace: ";
-  printValueOrDefault(options.workspace);
-  os << ", ";
-  os << "dlaSRAM: ";
-  printValueOrDefault(options.dlaSRAM);
-  os << ", ";
-  os << "dlaLocalDRAM: ";
-  printValueOrDefault(options.dlaLocalDRAM);
-  os << ", ";
-  os << "dlaGlobalDRAM: ";
-  printValueOrDefault(options.dlaGlobalDRAM);
-  return os;
-}
-
-} // namespace
-
-Arguments argsToArgumentsMap(int32_t argc, char* argv[]) {
-  Arguments arguments;
-  for (int32_t i = 1; i < argc; ++i) {
-    auto valuePtr = strchr(argv[i], '=');
-    if (valuePtr) {
-      std::string value{valuePtr + 1};
-      arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value);
-    } else {
-      arguments.emplace(argv[i], "");
-    }
-  }
-  return arguments;
-}
-
-void BaseModelOptions::parse(Arguments& arguments) {
-  if (getAndDelOption(arguments, "--onnx", model)) {
-    format = ModelFormat::kONNX;
-  } else if (getAndDelOption(arguments, "--uff", model)) {
-    format = ModelFormat::kUFF;
-  } else if (getAndDelOption(arguments, "--model", model)) {
-    format = ModelFormat::kCAFFE;
-  }
-}
-
-void UffInput::parse(Arguments& arguments) {
-  getAndDelOption(arguments, "--uffNHWC", NHWC);
-  std::vector<std::string> args;
-  if (getAndDelRepeatedOption(arguments, "--uffInput", args)) {
-    for (const auto& i : args) {
-      std::vector<std::string> values{splitToStringVec(i, ',')};
-      if (values.size() == 4) {
-        nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]),
-                             std::stoi(values[3])};
-        inputs.emplace_back(values[0], dims);
-      } else {
-        throw std::invalid_argument(std::string("Invalid uffInput ") + i);
-      }
-    }
-  }
-}
-
-void ModelOptions::parse(Arguments& arguments) {
-  baseModel.parse(arguments);
-
-  switch (baseModel.format) {
-  case ModelFormat::kCAFFE: {
-    getAndDelOption(arguments, "--deploy", prototxt);
-    break;
-  }
-  case ModelFormat::kUFF: {
-    uffInputs.parse(arguments);
-    if (uffInputs.inputs.empty()) {
-      throw std::invalid_argument("Uff models require at least one input");
-    }
-    break;
-  }
-  case ModelFormat::kONNX:
-    break;
-  case ModelFormat::kANY: {
-    if (getAndDelOption(arguments, "--deploy", prototxt)) {
-      baseModel.format = ModelFormat::kCAFFE;
-    }
-    break;
-  }
-  }
-
-  // The --output flag should only be used with Caffe and UFF. It has no effect
-  // on ONNX.
-  std::vector<std::string> outArgs;
-  if (getAndDelRepeatedOption(arguments, "--output", outArgs)) {
-    for (const auto& o : outArgs) {
-      for (auto& v : splitToStringVec(o, ',')) {
-        outputs.emplace_back(std::move(v));
-      }
-    }
-  }
-  if (baseModel.format == ModelFormat::kCAFFE ||
-      baseModel.format == ModelFormat::kUFF) {
-    if (outputs.empty()) {
-      throw std::invalid_argument(
-          "Caffe and Uff models require at least one output");
-    }
-  } else if (baseModel.format == ModelFormat::kONNX) {
-    if (!outputs.empty()) {
-      throw std::invalid_argument(
-          "The --output flag should not be used with ONNX models.");
-    }
-  }
-}
-
-void BuildOptions::parse(Arguments& arguments) {
-  auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector,
-                                 const char* argument) {
-    std::string list;
-    getAndDelOption(arguments, argument, list);
-    std::vector<std::string> formats{splitToStringVec(list, ',')};
-    for (const auto& f : formats) {
-      formatsVector.push_back(stringToValue<IOFormat>(f));
-    }
-  };
-
-  getFormats(inputFormats, "--inputIOFormats");
-  getFormats(outputFormats, "--outputIOFormats");
-
-  bool addedExplicitBatchFlag{false};
-  getAndDelOption(arguments, "--explicitBatch", addedExplicitBatchFlag);
-  if (addedExplicitBatchFlag) {
-    sample::gLogWarning
-        << "--explicitBatch flag has been deprecated and has no effect!"
-        << std::endl;
-    sample::gLogWarning << "Explicit batch dim is automatically enabled if "
-                           "input model is ONNX or if dynamic "
-                        << "shapes are provided when the engine is built."
-                        << std::endl;
-  }
-
-  bool minShapes = getShapesBuild(arguments, shapes, "--minShapes",
-                                  nvinfer1::OptProfileSelector::kMIN);
-  bool optShapes = getShapesBuild(arguments, shapes, "--optShapes",
-                                  nvinfer1::OptProfileSelector::kOPT);
-  bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes",
-                                  nvinfer1::OptProfileSelector::kMAX);
-  processShapes(shapes, minShapes, optShapes, maxShapes, false);
-  bool minShapesCalib =
-      getShapesBuild(arguments, shapesCalib, "--minShapesCalib",
-                     nvinfer1::OptProfileSelector::kMIN);
-  bool optShapesCalib =
-      getShapesBuild(arguments, shapesCalib, "--optShapesCalib",
-                     nvinfer1::OptProfileSelector::kOPT);
-  bool maxShapesCalib =
-      getShapesBuild(arguments, shapesCalib, "--maxShapesCalib",
-                     nvinfer1::OptProfileSelector::kMAX);
-  processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib,
-                true);
-
-  bool addedExplicitPrecisionFlag{false};
-  getAndDelOption(arguments, "--explicitPrecision", addedExplicitPrecisionFlag);
-  if (addedExplicitPrecisionFlag) {
-    sample::gLogWarning
-        << "--explicitPrecision flag has been deprecated and has no effect!"
-        << std::endl;
-  }
-
-  if (getAndDelOption(arguments, "--workspace", workspace)) {
-    sample::gLogWarning
-        << "--workspace flag has been deprecated by --memPoolSize flag."
-        << std::endl;
-  }
-
-  std::string memPoolSizes;
-  getAndDelOption(arguments, "--memPoolSize", memPoolSizes);
-  std::vector<std::string> memPoolSpecs{splitToStringVec(memPoolSizes, ',')};
-  for (auto const& memPoolSpec : memPoolSpecs) {
-    std::string memPoolName;
-    double memPoolSize;
-    std::tie(memPoolName, memPoolSize) = splitNameAndValue<double>(memPoolSpec);
-    if (memPoolSize < 0) {
-      throw std::invalid_argument(std::string("Negative memory pool size: ") +
-                                  std::to_string(memPoolSize));
-    }
-    if (memPoolName == "workspace") {
-      workspace = memPoolSize;
-    } else if (memPoolName == "dlaSRAM") {
-      dlaSRAM = memPoolSize;
-    } else if (memPoolName == "dlaLocalDRAM") {
-      dlaLocalDRAM = memPoolSize;
-    } else if (memPoolName == "dlaGlobalDRAM") {
-      dlaGlobalDRAM = memPoolSize;
-    } else if (!memPoolName.empty()) {
-      throw std::invalid_argument(std::string("Unknown memory pool: ") +
-                                  memPoolName);
-    }
-  }
-
-  getAndDelOption(arguments, "--maxBatch", maxBatch);
-  getAndDelOption(arguments, "--minTiming", minTiming);
-  getAndDelOption(arguments, "--avgTiming", avgTiming);
-
-  bool best{false};
-  getAndDelOption(arguments, "--best", best);
-  if (best) {
-    int8 = true;
-    fp16 = true;
-  }
-
-  getAndDelOption(arguments, "--refit", refittable);
-  getAndDelNegOption(arguments, "--noTF32", tf32);
-  getAndDelOption(arguments, "--fp16", fp16);
-  getAndDelOption(arguments, "--int8", int8);
-  getAndDelOption(arguments, "--safe", safe);
-  getAndDelOption(arguments, "--consistency", consistency);
-  getAndDelOption(arguments, "--restricted", restricted);
-
-  getAndDelOption(arguments, "--directIO", directIO);
-
-  std::string precisionConstraintsString;
-  getAndDelOption(arguments, "--precisionConstraints",
-                  precisionConstraintsString);
-  if (!precisionConstraintsString.empty()) {
-    const std::unordered_map<std::string, PrecisionConstraints>
-        precisionConstraintsMap = {{"obey", PrecisionConstraints::kOBEY},
-                                   {"prefer", PrecisionConstraints::kPREFER},
-                                   {"none", PrecisionConstraints::kNONE}};
-    auto it = precisionConstraintsMap.find(precisionConstraintsString);
-    if (it == precisionConstraintsMap.end()) {
-      throw std::invalid_argument(
-          std::string("Unknown precision constraints: ") +
-          precisionConstraintsString);
-    }
-    precisionConstraints = it->second;
-  } else {
-    precisionConstraints = PrecisionConstraints::kNONE;
-  }
-
-  getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions);
-  getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes);
-
-  if (layerPrecisions.empty() && layerOutputTypes.empty() &&
-      precisionConstraints != PrecisionConstraints::kNONE) {
-    sample::gLogWarning << "When --precisionConstraints flag is set to "
-                           "\"obey\" or \"prefer\", please add "
-                        << "--layerPrecision/--layerOutputTypes flags to set "
-                           "layer-wise precisions and output "
-                        << "types." << std::endl;
-  } else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) &&
-             precisionConstraints == PrecisionConstraints::kNONE) {
-    sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no "
-                           "effect when --precisionConstraints "
-                        << "flag is set to \"none\"." << std::endl;
-  }
-
-  std::string sparsityString;
-  getAndDelOption(arguments, "--sparsity", sparsityString);
-  if (sparsityString == "disable") {
-    sparsity = SparsityFlag::kDISABLE;
-  } else if (sparsityString == "enable") {
-    sparsity = SparsityFlag::kENABLE;
-  } else if (sparsityString == "force") {
-    sparsity = SparsityFlag::kFORCE;
-  } else if (!sparsityString.empty()) {
-    throw std::invalid_argument(std::string("Unknown sparsity mode: ") +
-                                sparsityString);
-  }
-
-  bool calibCheck = getAndDelOption(arguments, "--calib", calibration);
-  if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty()) {
-    shapesCalib = shapes;
-  }
-
-  std::string profilingVerbosityString;
-  if (getAndDelOption(arguments, "--nvtxMode", profilingVerbosityString)) {
-    sample::gLogWarning
-        << "--nvtxMode flag has been deprecated by --profilingVerbosity flag."
-        << std::endl;
-  }
-
-  getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString);
-  if (profilingVerbosityString == "layer_names_only") {
-    profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
-  } else if (profilingVerbosityString == "none") {
-    profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE;
-  } else if (profilingVerbosityString == "detailed") {
-    profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED;
-  } else if (profilingVerbosityString == "default") {
-    sample::gLogWarning
-        << "--profilingVerbosity=default has been deprecated by "
-           "--profilingVerbosity=layer_names_only."
-        << std::endl;
-    profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
-  } else if (profilingVerbosityString == "verbose") {
-    sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated "
-                           "by --profilingVerbosity=detailed."
-                        << std::endl;
-    profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED;
-  } else if (!profilingVerbosityString.empty()) {
-    throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") +
-                                profilingVerbosityString);
-  }
-
-  if (getAndDelOption(arguments, "--loadEngine", engine)) {
-    load = true;
-  }
-  if (getAndDelOption(arguments, "--saveEngine", engine)) {
-    save = true;
-  }
-  if (load && save) {
-    throw std::invalid_argument(
-        "Incompatible load and save engine options selected");
-  }
-
-  std::string tacticSourceArgs;
-  if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) {
-    std::vector<std::string> tacticList =
-        splitToStringVec(tacticSourceArgs, ',');
-    for (auto& t : tacticList) {
-      bool enable{false};
-      if (t.front() == '+') {
-        enable = true;
-      } else if (t.front() != '-') {
-        throw std::invalid_argument(
-            "Tactic source must be prefixed with + or -, indicating whether it "
-            "should be enabled or disabled "
-            "respectively.");
-      }
-      t.erase(0, 1);
-
-      const auto toUpper = [](std::string& sourceName) {
-        std::transform(sourceName.begin(), sourceName.end(), sourceName.begin(),
-                       [](char c) { return std::toupper(c); });
-        return sourceName;
-      };
-
-      nvinfer1::TacticSource source{};
-      t = toUpper(t);
-      if (t == "CUBLAS") {
-        source = nvinfer1::TacticSource::kCUBLAS;
-      } else if (t == "CUBLASLT" || t == "CUBLAS_LT") {
-        source = nvinfer1::TacticSource::kCUBLAS_LT;
-      } else if (t == "CUDNN") {
-        source = nvinfer1::TacticSource::kCUDNN;
-      } else {
-        throw std::invalid_argument(std::string("Unknown tactic source: ") + t);
-      }
-
-      uint32_t sourceBit = 1U << static_cast<uint32_t>(source);
-
-      if (enable) {
-        enabledTactics |= sourceBit;
-      } else {
-        disabledTactics |= sourceBit;
-      }
-
-      if (enabledTactics & disabledTactics) {
-        throw std::invalid_argument(std::string("Cannot enable and disable ") +
-                                    t);
-      }
-    }
-  }
-
-  bool noBuilderCache{false};
-  getAndDelOption(arguments, "--noBuilderCache", noBuilderCache);
-  getAndDelOption(arguments, "--timingCacheFile", timingCacheFile);
-  if (noBuilderCache) {
-    timingCacheMode = TimingCacheMode::kDISABLE;
-  } else if (!timingCacheFile.empty()) {
-    timingCacheMode = TimingCacheMode::kGLOBAL;
-  } else {
-    timingCacheMode = TimingCacheMode::kLOCAL;
-  }
-}
-
-void SystemOptions::parse(Arguments& arguments) {
-  getAndDelOption(arguments, "--device", device);
-  getAndDelOption(arguments, "--useDLACore", DLACore);
-  getAndDelOption(arguments, "--allowGPUFallback", fallback);
-  std::string pluginName;
-  while (getAndDelOption(arguments, "--plugins", pluginName)) {
-    plugins.emplace_back(pluginName);
-  }
-}
-
-void InferenceOptions::parse(Arguments& arguments) {
-  getAndDelOption(arguments, "--streams", streams);
-  getAndDelOption(arguments, "--iterations", iterations);
-  getAndDelOption(arguments, "--duration", duration);
-  getAndDelOption(arguments, "--warmUp", warmup);
-  getAndDelOption(arguments, "--sleepTime", sleep);
-  getAndDelOption(arguments, "--idleTime", idle);
-  bool exposeDMA{false};
-  if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) {
-    overlap = !exposeDMA;
-  }
-  getAndDelOption(arguments, "--noDataTransfers", skipTransfers);
-  getAndDelOption(arguments, "--useManagedMemory", useManaged);
-  getAndDelOption(arguments, "--useSpinWait", spin);
-  getAndDelOption(arguments, "--threads", threads);
-  getAndDelOption(arguments, "--useCudaGraph", graph);
-  getAndDelOption(arguments, "--separateProfileRun", rerun);
-  getAndDelOption(arguments, "--buildOnly", skip);
-  getAndDelOption(arguments, "--timeDeserialize", timeDeserialize);
-  getAndDelOption(arguments, "--timeRefit", timeRefit);
-
-  std::string list;
-  getAndDelOption(arguments, "--loadInputs", list);
-  std::vector<std::string> inputsList{splitToStringVec(list, ',')};
-  splitInsertKeyValue(inputsList, inputs);
-
-  getShapesInference(arguments, shapes, "--shapes");
-  getAndDelOption(arguments, "--batch", batch);
-}
-
-void ReportingOptions::parse(Arguments& arguments) {
-  getAndDelOption(arguments, "--percentile", percentile);
-  getAndDelOption(arguments, "--avgRuns", avgs);
-  getAndDelOption(arguments, "--verbose", verbose);
-  getAndDelOption(arguments, "--dumpRefit", refit);
-  getAndDelOption(arguments, "--dumpOutput", output);
-  getAndDelOption(arguments, "--dumpProfile", profile);
-  getAndDelOption(arguments, "--dumpLayerInfo", layerInfo);
-  getAndDelOption(arguments, "--exportTimes", exportTimes);
-  getAndDelOption(arguments, "--exportOutput", exportOutput);
-  getAndDelOption(arguments, "--exportProfile", exportProfile);
-  getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo);
-  if (percentile < 0 || percentile > 100) {
-    throw std::invalid_argument(std::string("Percentile ") +
-                                std::to_string(percentile) +
-                                "is not in [0,100]");
-  }
-}
-
-bool parseHelp(Arguments& arguments) {
-  bool helpLong{false};
-  bool helpShort{false};
-  getAndDelOption(arguments, "--help", helpLong);
-  getAndDelOption(arguments, "-h", helpShort);
-  return helpLong || helpShort;
-}
-
-void AllOptions::parse(Arguments& arguments) {
-  model.parse(arguments);
-  build.parse(arguments);
-  system.parse(arguments);
-  inference.parse(arguments);
-
-  // Use explicitBatch when input model is ONNX or when dynamic shapes are used.
-  const bool isOnnx{model.baseModel.format == ModelFormat::kONNX};
-  const bool hasDynamicShapes{!build.shapes.empty() ||
-                              !inference.shapes.empty()};
-  const bool detectedExplicitBatch = isOnnx || hasDynamicShapes;
-
-  // Throw an error if user tries to use --batch or --maxBatch when the engine
-  // has explicit batch dim.
-  const bool maxBatchWasSet{build.maxBatch != maxBatchNotProvided};
-  const bool batchWasSet{inference.batch != batchNotProvided};
-  if (detectedExplicitBatch && (maxBatchWasSet || batchWasSet)) {
-    throw std::invalid_argument(
-        "The --batch and --maxBatch flags should not be used when the input "
-        "model is ONNX or when dynamic shapes "
-        "are provided. Please use --optShapes and --shapes to set input shapes "
-        "instead.");
-  }
-
-  // If batch and/or maxBatch is not set and the engine has implicit batch dim,
-  // set them to default values.
-  if (!detectedExplicitBatch) {
-    // If batch is not set, set it to default value.
-    if (!batchWasSet) {
-      inference.batch = defaultBatch;
-    }
-    // If maxBatch is not set, set it to be equal to batch.
-    if (!maxBatchWasSet) {
-      build.maxBatch = inference.batch;
-    }
-    // MaxBatch should not be less than batch.
-    if (build.maxBatch < inference.batch) {
-      throw std::invalid_argument(
-          "Build max batch " + std::to_string(build.maxBatch) +
-          " is less than inference batch " + std::to_string(inference.batch));
-    }
-  }
-
-  if (build.shapes.empty() && !inference.shapes.empty()) {
-    // If --shapes are provided but --optShapes are not, assume that optShapes
-    // is the same as shapes.
-    for (auto& s : inference.shapes) {
-      insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN,
-                        s.first, s.second);
-      insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT,
-                        s.first, s.second);
-      insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX,
-                        s.first, s.second);
-    }
-  } else if (!build.shapes.empty() && inference.shapes.empty()) {
-    // If --optShapes are provided but --shapes are not, assume that shapes is
-    // the same as optShapes.
-    for (auto& s : build.shapes) {
-      insertShapesInference(
-          inference.shapes, s.first,
-          s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
-    }
-  }
-
-  reporting.parse(arguments);
-  helps = parseHelp(arguments);
-
-  if (!helps) {
-    if (!build.load && model.baseModel.format == ModelFormat::kANY) {
-      throw std::invalid_argument("Model missing or format not recognized");
-    }
-    if (build.safe && system.DLACore >= 0) {
-      auto checkSafeDLAFormats = [](std::vector<IOFormat> const& fmt) {
-        return fmt.empty()
-                   ? false
-                   : std::all_of(fmt.begin(), fmt.end(),
-                                 [](IOFormat const& pair) {
-                                   bool supported{false};
-                                   bool const isLINEAR{
-                                       pair.second ==
-                                       1U << static_cast<int32_t>(
-                                           nvinfer1::TensorFormat::kLINEAR)};
-                                   bool const isCHW4{
-                                       pair.second ==
-                                       1U << static_cast<int32_t>(
-                                           nvinfer1::TensorFormat::kCHW4)};
-                                   bool const isCHW32{
-                                       pair.second ==
-                                       1U << static_cast<int32_t>(
-                                           nvinfer1::TensorFormat::kCHW32)};
-                                   bool const isCHW16{
-                                       pair.second ==
-                                       1U << static_cast<int32_t>(
-                                           nvinfer1::TensorFormat::kCHW16)};
-                                   supported |= pair.first ==
-                                                    nvinfer1::DataType::kINT8 &&
-                                                (isLINEAR || isCHW4 || isCHW32);
-                                   supported |= pair.first ==
-                                                    nvinfer1::DataType::kHALF &&
-                                                (isLINEAR || isCHW4 || isCHW16);
-                                   return supported;
-                                 });
-      };
-      if (!checkSafeDLAFormats(build.inputFormats) ||
-          !checkSafeDLAFormats(build.outputFormats)) {
-        throw std::invalid_argument(
-            "I/O formats for safe DLA capability are restricted to "
-            "fp16/int8:linear, fp16:chw16 or int8:chw32");
-      }
-      if (system.fallback) {
-        throw std::invalid_argument(
-            "GPU fallback (--allowGPUFallback) not allowed for safe DLA "
-            "capability");
-      }
-    }
-  }
-}
-
-void SafeBuilderOptions::parse(Arguments& arguments) {
-  auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector,
-                                 const char* argument) {
-    std::string list;
-    getAndDelOption(arguments, argument, list);
-    std::vector<std::string> formats{splitToStringVec(list, ',')};
-    for (const auto& f : formats) {
-      formatsVector.push_back(stringToValue<IOFormat>(f));
-    }
-  };
-
-  getAndDelOption(arguments, "--serialized", serialized);
-  getAndDelOption(arguments, "--onnx", onnxModelFile);
-  getAndDelOption(arguments, "--help", help);
-  getAndDelOption(arguments, "-h", help);
-  getAndDelOption(arguments, "--verbose", verbose);
-  getAndDelOption(arguments, "-v", verbose);
-  getFormats(inputFormats, "--inputIOFormats");
-  getFormats(outputFormats, "--outputIOFormats");
-  getAndDelOption(arguments, "--int8", int8);
-  getAndDelOption(arguments, "--calib", calibFile);
-  getAndDelOption(arguments, "--consistency", consistency);
-  getAndDelOption(arguments, "--std", standard);
-  std::string pluginName;
-  while (getAndDelOption(arguments, "--plugins", pluginName)) {
-    plugins.emplace_back(pluginName);
-  }
-}
-
-std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) {
-  os << "=== Model Options ===" << std::endl;
-
-  os << "Format: ";
-  switch (options.format) {
-  case ModelFormat::kCAFFE: {
-    os << "Caffe";
-    break;
-  }
-  case ModelFormat::kONNX: {
-    os << "ONNX";
-    break;
-  }
-  case ModelFormat::kUFF: {
-    os << "UFF";
-    break;
-  }
-  case ModelFormat::kANY:
-    os << "*";
-    break;
-  }
-  os << std::endl << "Model: " << options.model << std::endl;
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const UffInput& input) {
-  os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl;
-  for (const auto& i : input.inputs) {
-    os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1]
-       << "," << i.second.d[2] << std::endl;
-  }
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const ModelOptions& options) {
-  os << options.baseModel;
-  switch (options.baseModel.format) {
-  case ModelFormat::kCAFFE: {
-    os << "Prototxt: " << options.prototxt << std::endl;
-    break;
-  }
-  case ModelFormat::kUFF: {
-    os << options.uffInputs;
-    break;
-  }
-  case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or
-                           // the generic case
-  case ModelFormat::kANY:
-    break;
-  }
-
-  os << "Output:";
-  for (const auto& o : options.outputs) {
-    os << " " << o;
-  }
-  os << std::endl;
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) {
-  switch (dtype) {
-  case nvinfer1::DataType::kFLOAT: {
-    os << "fp32";
-    break;
-  }
-  case nvinfer1::DataType::kHALF: {
-    os << "fp16";
-    break;
-  }
-  case nvinfer1::DataType::kINT8: {
-    os << "int8";
-    break;
-  }
-  case nvinfer1::DataType::kINT32: {
-    os << "int32";
-    break;
-  }
-  case nvinfer1::DataType::kBOOL: {
-    os << "bool";
-    break;
-  }
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, IOFormat const& format) {
-  os << format.first << ":";
-
-  for (int32_t f = 0; f < nvinfer1::EnumMax<nvinfer1::TensorFormat>(); ++f) {
-    if ((1U << f) & format.second) {
-      if (f) {
-        os << "+";
-      }
-      switch (nvinfer1::TensorFormat(f)) {
-      case nvinfer1::TensorFormat::kLINEAR: {
-        os << "chw";
-        break;
-      }
-      case nvinfer1::TensorFormat::kCHW2: {
-        os << "chw2";
-        break;
-      }
-      case nvinfer1::TensorFormat::kHWC8: {
-        os << "hwc8";
-        break;
-      }
-      case nvinfer1::TensorFormat::kHWC16: {
-        os << "hwc16";
-        break;
-      }
-      case nvinfer1::TensorFormat::kCHW4: {
-        os << "chw4";
-        break;
-      }
-      case nvinfer1::TensorFormat::kCHW16: {
-        os << "chw16";
-        break;
-      }
-      case nvinfer1::TensorFormat::kCHW32: {
-        os << "chw32";
-        break;
-      }
-      case nvinfer1::TensorFormat::kDHWC8: {
-        os << "dhwc8";
-        break;
-      }
-      case nvinfer1::TensorFormat::kCDHW32: {
-        os << "cdhw32";
-        break;
-      }
-      case nvinfer1::TensorFormat::kHWC: {
-        os << "hwc";
-        break;
-      }
-      case nvinfer1::TensorFormat::kDLA_LINEAR: {
-        os << "dla_linear";
-        break;
-      }
-      case nvinfer1::TensorFormat::kDLA_HWC4: {
-        os << "dla_hwc4";
-        break;
-      }
-      }
-    }
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) {
-  int32_t i = 0;
-  for (const auto& d : dims) {
-    if (!d.size()) {
-      break;
-    }
-    os << (i ? "+" : "") << d;
-    ++i;
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os,
-                         LayerPrecisions const& layerPrecisions) {
-  int32_t i = 0;
-  for (auto const& layerPrecision : layerPrecisions) {
-    os << (i ? "," : "") << layerPrecision.first << ":"
-       << layerPrecision.second;
-    ++i;
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const BuildOptions& options) {
-  // clang-format off
-    os << "=== Build Options ==="                                                                                       << std::endl <<
-
-          "Max batch: ";        printBatch(os, options.maxBatch)                                                        << std::endl <<
-          "Memory Pools: ";     printMemoryPools(os, options)                                                           << std::endl <<
-          "minTiming: "      << options.minTiming                                                                       << std::endl <<
-          "avgTiming: "      << options.avgTiming                                                                       << std::endl <<
-          "Precision: ";        printPrecision(os, options)                                                             << std::endl <<
-          "LayerPrecisions: " << options.layerPrecisions                                                                << std::endl <<
-          "Calibration: "    << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl <<
-          "Refit: "          << boolToEnabled(options.refittable)                                                       << std::endl <<
-          "Sparsity: ";         printSparsity(os, options)                                                              << std::endl <<
-          "Safe mode: "      << boolToEnabled(options.safe)                                                             << std::endl <<
-          "DirectIO mode: "  << boolToEnabled(options.directIO)                                                         << std::endl <<
-          "Restricted mode: " << boolToEnabled(options.restricted)                                                      << std::endl <<
-          "Save engine: "    << (options.save ? options.engine : "")                                                    << std::endl <<
-          "Load engine: "    << (options.load ? options.engine : "")                                                    << std::endl <<
-          "Profiling verbosity: " << static_cast<int32_t>(options.profilingVerbosity)                                   << std::endl <<
-          "Tactic sources: ";   printTacticSources(os, options.enabledTactics, options.disabledTactics)                 << std::endl <<
-          "timingCacheMode: ";  printTimingCache(os, options)                                                           << std::endl <<
-          "timingCacheFile: " << options.timingCacheFile                                                                << std::endl;
-  // clang-format on
-
-  auto printIOFormats = [](std::ostream& os, const char* direction,
-                           const std::vector<IOFormat> formats) {
-    if (formats.empty()) {
-      os << direction << "s format: fp32:CHW" << std::endl;
-    } else {
-      for (const auto& f : formats) {
-        os << direction << ": " << f << std::endl;
-      }
-    }
-  };
-
-  printIOFormats(os, "Input(s)", options.inputFormats);
-  printIOFormats(os, "Output(s)", options.outputFormats);
-  printShapes(os, "build", options.shapes);
-  printShapes(os, "calibration", options.shapesCalib);
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const SystemOptions& options) {
-  // clang-format off
-    os << "=== System Options ==="                                                                << std::endl <<
-
-          "Device: "  << options.device                                                           << std::endl <<
-          "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "")           <<
-                         (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl;
-    os << "Plugins:";
-
-    for (const auto& p : options.plugins)
-    {
-        os << " " << p;
-    }
-    os << std::endl;
-
-    return os;
-  // clang-format on
-}
-
-std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) {
-  // clang-format off
-    os << "=== Inference Options ==="                                     << std::endl <<
-
-          "Batch: ";
-    if (options.batch && options.shapes.empty())
-    {
-                          os << options.batch                             << std::endl;
-    }
-    else
-    {
-                          os << "Explicit"                                << std::endl;
-    }
-    printShapes(os, "inference", options.shapes);
-    os << "Iterations: "         << options.iterations                    << std::endl <<
-          "Duration: "           << options.duration   << "s (+ "
-                                 << options.warmup     << "ms warm up)"   << std::endl <<
-          "Sleep time: "         << options.sleep      << "ms"            << std::endl <<
-          "Idle time: "          << options.idle       << "ms"            << std::endl <<
-          "Streams: "            << options.streams                       << std::endl <<
-          "ExposeDMA: "          << boolToEnabled(!options.overlap)       << std::endl <<
-          "Data transfers: "     << boolToEnabled(!options.skipTransfers) << std::endl <<
-          "Spin-wait: "          << boolToEnabled(options.spin)           << std::endl <<
-          "Multithreading: "     << boolToEnabled(options.threads)        << std::endl <<
-          "CUDA Graph: "         << boolToEnabled(options.graph)          << std::endl <<
-          "Separate profiling: " << boolToEnabled(options.rerun)          << std::endl <<
-          "Time Deserialize: "   << boolToEnabled(options.timeDeserialize) << std::endl <<
-          "Time Refit: "         << boolToEnabled(options.timeRefit) << std::endl <<
-          "Skip inference: "     << boolToEnabled(options.skip)           << std::endl;
-
-  // clang-format on
-  os << "Inputs:" << std::endl;
-  for (const auto& input : options.inputs) {
-    os << input.first << "<-" << input.second << std::endl;
-  }
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) {
-  // clang-format off
-    os << "=== Reporting Options ==="                                       << std::endl <<
-
-          "Verbose: "                     << boolToEnabled(options.verbose) << std::endl <<
-          "Averages: "                    << options.avgs << " inferences"  << std::endl <<
-          "Percentile: "                  << options.percentile             << std::endl <<
-          "Dump refittable layers:"       << boolToEnabled(options.refit)   << std::endl <<
-          "Dump output: "                 << boolToEnabled(options.output)  << std::endl <<
-          "Profile: "                     << boolToEnabled(options.profile) << std::endl <<
-          "Export timing to JSON file: "  << options.exportTimes            << std::endl <<
-          "Export output to JSON file: "  << options.exportOutput           << std::endl <<
-          "Export profile to JSON file: " << options.exportProfile          << std::endl;
-  // clang-format on
-
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const AllOptions& options) {
-  os << options.model << options.build << options.system << options.inference
-     << options.reporting << std::endl;
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) {
-  auto printIOFormats = [](std::ostream& os, const char* direction,
-                           const std::vector<IOFormat> formats) {
-    if (formats.empty()) {
-      os << direction << "s format: fp32:CHW" << std::endl;
-    } else {
-      for (const auto& f : formats) {
-        os << direction << ": " << f << std::endl;
-      }
-    }
-  };
-
-  os << "=== Build Options ===" << std::endl;
-  os << "Model ONNX: " << options.onnxModelFile << std::endl;
-
-  os << "Precision: FP16";
-  if (options.int8) {
-    os << " + INT8";
-  }
-  os << std::endl;
-  os << "Calibration file: " << options.calibFile << std::endl;
-  os << "Serialized Network: " << options.serialized << std::endl;
-
-  printIOFormats(os, "Input(s)", options.inputFormats);
-  printIOFormats(os, "Output(s)", options.outputFormats);
-
-  os << "Plugins:";
-  for (const auto& p : options.plugins) {
-    os << " " << p;
-  }
-  os << std::endl;
-  return os;
-}
-
-void BaseModelOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "  --uff=<file>                UFF model"                                             << std::endl <<
-          "  --onnx=<file>               ONNX model"                                            << std::endl <<
-          "  --model=<file>              Caffe model (default = no model, random weights used)" << std::endl;
-  // clang-format on
-}
-
-void UffInput::help(std::ostream& os) {
-  // clang-format off
-    os << "  --uffInput=<name>,X,Y,Z     Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified "
-                                                       "multiple times; at least one is required for UFF models" << std::endl <<
-          "  --uffNHWC                   Set if inputs are in the NHWC layout instead of NCHW (use "             <<
-                                                                    "X,Y,Z=H,W,C order in --uffInput)"           << std::endl;
-  // clang-format on
-}
-
-void ModelOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "=== Model Options ==="                                                                                 << std::endl;
-    BaseModelOptions::help(os);
-    os << "  --deploy=<file>             Caffe prototxt file"                                                     << std::endl <<
-          "  --output=<name>[,<name>]*   Output names (it can be specified multiple times); at least one output "
-                                                                                  "is required for UFF and Caffe" << std::endl;
-    UffInput::help(os);
-  // clang-format on
-}
-
-void BuildOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "=== Build Options ==="                                                                                                            "\n"
-          "  --maxBatch                  Set max batch size and build an implicit batch engine (default = same size as --batch)"             "\n"
-          "                              This option should not be used when the input model is ONNX or when dynamic shapes are provided."   "\n"
-          "  --minShapes=spec            Build with dynamic shapes using a profile with the min shapes provided"                             "\n"
-          "  --optShapes=spec            Build with dynamic shapes using a profile with the opt shapes provided"                             "\n"
-          "  --maxShapes=spec            Build with dynamic shapes using a profile with the max shapes provided"                             "\n"
-          "  --minShapesCalib=spec       Calibrate with dynamic shapes using a profile with the min shapes provided"                         "\n"
-          "  --optShapesCalib=spec       Calibrate with dynamic shapes using a profile with the opt shapes provided"                         "\n"
-          "  --maxShapesCalib=spec       Calibrate with dynamic shapes using a profile with the max shapes provided"                         "\n"
-          "                              Note: All three of min, opt and max shapes must be supplied."                                       "\n"
-          "                                    However, if only opt shapes is supplied then it will be expanded so"                          "\n"
-          "                                    that min shapes and max shapes are set to the same values as opt shapes."                     "\n"
-          "                                    Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')."                 "\n"
-          "                              Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128"                                   "\n"
-          "                              Each input shape is supplied as a key-value pair where key is the input name and"                   "\n"
-          "                              value is the dimensions (including the batch dimension) to be used for that input."                 "\n"
-          "                              Each key-value pair has the key and value separated using a colon (:)."                             "\n"
-          "                              Multiple input shapes can be provided via comma-separated key-value pairs."                         "\n"
-          "  --inputIOFormats=spec       Type and format of each of the input tensors (default = all inputs in fp32:chw)"                    "\n"
-          "                              See --outputIOFormats help for the grammar of type and format list."                                "\n"
-          "                              Note: If this option is specified, please set comma-separated types and formats for all"            "\n"
-          "                                    inputs following the same order as network inputs ID (even if only one input"                 "\n"
-          "                                    needs specifying IO format) or set the type and format once for broadcasting."                "\n"
-          "  --outputIOFormats=spec      Type and format of each of the output tensors (default = all outputs in fp32:chw)"                  "\n"
-          "                              Note: If this option is specified, please set comma-separated types and formats for all"            "\n"
-          "                                    outputs following the same order as network outputs ID (even if only one output"              "\n"
-          "                                    needs specifying IO format) or set the type and format once for broadcasting."                "\n"
-          "                              IO Formats: spec  ::= IOfmt[\",\"spec]"                                                             "\n"
-          "                                          IOfmt ::= type:fmt"                                                                     "\n"
-          "                                          type  ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\""                                         "\n"
-          "                                          fmt   ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" "\n"
-          "  --workspace=N               Set workspace size in MiB."                                                                         "\n"
-          "  --memPoolSize=poolspec      Specify the size constraints of the designated memory pool(s) in MiB."                              "\n"
-          "                              Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n"
-          "                              Pool constraint: poolspec ::= poolfmt[\",\"poolspec]"                                               "\n"
-          "                                               poolfmt ::= pool:sizeInMiB"                                                        "\n"
-          "                                               pool ::= \"workspace\"|\"dlaSRAM\"|\"dlaLocalDRAM\"|\"dlaGlobalDRAM\""             "\n"
-          "  --profilingVerbosity=mode   Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)"  "\n"
-          "  --minTiming=M               Set the minimum number of iterations used in kernel selection (default = "
-                                                                                                           << defaultMinTiming << ")"        "\n"
-          "  --avgTiming=M               Set the number of times averaged in each iteration for kernel selection (default = "
-                                                                                                           << defaultAvgTiming << ")"        "\n"
-          "  --refit                     Mark the engine as refittable. This will allow the inspection of refittable layers "                "\n"
-          "                              and weights within the engine."                                                                     "\n"
-          "  --sparsity=spec             Control sparsity (default = disabled). "                                                            "\n"
-          "                              Sparsity: spec ::= \"disable\", \"enable\", \"force\""                                              "\n"
-          "                              Note: Description about each of these options is as below"                                          "\n"
-          "                                    disable = do not enable sparse tactics in the builder (this is the default)"                  "\n"
-          "                                    enable  = enable sparse tactics in the builder (but these tactics will only be"               "\n"
-          "                                              considered if the weights have the right sparsity pattern)"                         "\n"
-          "                                    force   = enable sparse tactics in the builder and force-overwrite the weights to have"       "\n"
-          "                                              a sparsity pattern (even if you loaded a model yourself)"                           "\n"
-          "  --noTF32                    Disable tf32 precision (default is to enable tf32, in addition to fp32)"                            "\n"
-          "  --fp16                      Enable fp16 precision, in addition to fp32 (default = disabled)"                                    "\n"
-          "  --int8                      Enable int8 precision, in addition to fp32 (default = disabled)"                                    "\n"
-          "  --best                      Enable all precisions to achieve the best performance (default = disabled)"                         "\n"
-          "  --directIO                  Avoid reformatting at network boundaries. (default = disabled)"                                     "\n"
-          "  --precisionConstraints=spec Control precision constraint setting. (default = none)"                                             "\n"
-          "                                  Precision Constaints: spec ::= \"none\" | \"obey\" | \"prefer\""                                "\n"
-          "                                  none = no constraints"                                                                          "\n"
-          "                                  prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible"    "\n"
-          "                                  obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail"          "\n"
-          "                                         otherwise"                                                                               "\n"
-          "  --layerPrecisions=spec      Control per-layer precision constraints. Effective only when precisionConstraints is set to"        "\n"
-          "                              \"obey\" or \"prefer\". (default = none)"                                                           "\n"
-          "                              The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a"     "\n"
-          "                              layerName to specify the default precision for all the unspecified layers."                         "\n"
-          "                              Per-layer precision spec ::= layerPrecision[\",\"spec]"                                             "\n"
-          "                                                  layerPrecision ::= layerName\":\"precision"                                     "\n"
-          "                                                  precision ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\""                             "\n"
-          "  --layerOutputTypes=spec     Control per-layer output type constraints. Effective only when precisionConstraints is set to"      "\n"
-          "                              \"obey\" or \"prefer\". (default = none)"                                                           "\n"
-          "                              The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a"     "\n"
-          "                              layerName to specify the default precision for all the unspecified layers. If a layer has more than""\n"
-          "                              one output, then multiple types separated by \"+\" can be provided for this layer."                 "\n"
-          "                              Per-layer output type spec ::= layerOutputTypes[\",\"spec]"                                         "\n"
-          "                                                    layerOutputTypes ::= layerName\":\"type"                                      "\n"
-          "                                                    type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"[\"+\"type]"                     "\n"
-          "  --calib=<file>              Read INT8 calibration cache file"                                                                   "\n"
-          "  --safe                      Enable build safety certified engine"                                                               "\n"
-          "  --consistency               Perform consistency checking on safety certified engine"                                            "\n"
-          "  --restricted                Enable safety scope checking with kSAFETY_SCOPE build flag"                                         "\n"
-          "  --saveEngine=<file>         Save the serialized engine"                                                                         "\n"
-          "  --loadEngine=<file>         Load a serialized engine"                                                                           "\n"
-          "  --tacticSources=tactics     Specify the tactics to be used by adding (+) or removing (-) tactics from the default "             "\n"
-          "                              tactic sources (default = all available tactics)."                                                  "\n"
-          "                              Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics."                   "\n"
-          "                              Tactic Sources: tactics ::= [\",\"tactic]"                                                          "\n"
-          "                                              tactic  ::= (+|-)lib"                                                               "\n"
-          "                                              lib     ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\""                                     "\n"
-          "                              For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS"                    "\n"
-          "  --noBuilderCache            Disable timing cache in builder (default is to enable timing cache)"                                "\n"
-          "  --timingCacheFile=<file>    Save/load the serialized global timing cache"                                                       "\n"
-          ;
-  // clang-format on
-  os << std::flush;
-}
-
-void SystemOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "=== System Options ==="                                                                         << std::endl <<
-          "  --device=N                  Select cuda device N (default = "         << defaultDevice << ")" << std::endl <<
-          "  --useDLACore=N              Select DLA core N for layers that support DLA (default = none)"   << std::endl <<
-          "  --allowGPUFallback          When DLA is enabled, allow GPU fallback for unsupported layers "
-                                                                                    "(default = disabled)" << std::endl;
-    os << "  --plugins                   Plugin library (.so) to load (can be specified multiple times)"   << std::endl;
-  // clang-format on
-}
-
-void InferenceOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "=== Inference Options ==="                                                                                                << std::endl <<
-          "  --batch=N                   Set batch size for implicit batch engines (default = "              << defaultBatch << ")"  << std::endl <<
-          "                              This option should not be used when the engine is built from an ONNX model or when dynamic" << std::endl <<
-          "                              shapes are provided when the engine is built."                                              << std::endl <<
-          "  --shapes=spec               Set input shapes for dynamic shapes inference inputs."                                      << std::endl <<
-          "                              Note: Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')."         << std::endl <<
-          "                              Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128"                          << std::endl <<
-          "                              Each input shape is supplied as a key-value pair where key is the input name and"           << std::endl <<
-          "                              value is the dimensions (including the batch dimension) to be used for that input."         << std::endl <<
-          "                              Each key-value pair has the key and value separated using a colon (:)."                     << std::endl <<
-          "                              Multiple input shapes can be provided via comma-separated key-value pairs."                 << std::endl <<
-          "  --loadInputs=spec           Load input values from files (default = generate random inputs). Input names can be "
-                                                                                       "wrapped with single quotes (ex: 'Input:0')"  << std::endl <<
-          "                              Input values spec ::= Ival[\",\"spec]"                                                      << std::endl <<
-          "                                           Ival ::= name\":\"file"                                                        << std::endl <<
-          "  --iterations=N              Run at least N inference iterations (default = "               << defaultIterations << ")"  << std::endl <<
-          "  --warmUp=N                  Run for N milliseconds to warmup before measuring performance (default = "
-                                                                                                            << defaultWarmUp << ")"  << std::endl <<
-          "  --duration=N                Run performance measurements for at least N seconds wallclock time (default = "
-                                                                                                          << defaultDuration << ")"  << std::endl <<
-          "  --sleepTime=N               Delay inference start with a gap of N milliseconds between launch and compute "
-                                                                                               "(default = " << defaultSleep << ")"  << std::endl <<
-          "  --idleTime=N                Sleep N milliseconds between two continuous iterations"
-                                                                                               "(default = " << defaultIdle << ")"   << std::endl <<
-          "  --streams=N                 Instantiate N engines to use concurrently (default = "            << defaultStreams << ")"  << std::endl <<
-          "  --exposeDMA                 Serialize DMA transfers to and from device (default = disabled)."                           << std::endl <<
-          "  --noDataTransfers           Disable DMA transfers to and from device (default = enabled)."                              << std::endl <<
-          "  --useManagedMemory          Use managed memory instead of seperate host and device allocations (default = disabled)."   << std::endl <<
-          "  --useSpinWait               Actively synchronize on GPU events. This option may decrease synchronization time but "
-                                                                             "increase CPU usage and power (default = disabled)"     << std::endl <<
-          "  --threads                   Enable multithreading to drive engines with independent threads"
-                                                                                " or speed up refitting (default = disabled) "       << std::endl <<
-          "  --useCudaGraph              Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl <<
-          "                              This flag may be ignored if the graph capture fails."                                       << std::endl <<
-          "  --timeDeserialize           Time the amount of time it takes to deserialize the network and exit."                      << std::endl <<
-          "  --timeRefit                 Time the amount of time it takes to refit the engine before inference."                     << std::endl <<
-          "  --separateProfileRun        Do not attach the profiler in the benchmark run; if profiling is enabled, a second "
-                                                                                "profile run will be executed (default = disabled)"  << std::endl <<
-          "  --buildOnly                 Skip inference perf measurement (default = disabled)"                                       << std::endl;
-  // clang-format on
-}
-
-void ReportingOptions::help(std::ostream& os) {
-  // clang-format off
-    os << "=== Reporting Options ==="                                                                    << std::endl <<
-          "  --verbose                   Use verbose logging (default = false)"                          << std::endl <<
-          "  --avgRuns=N                 Report performance measurements averaged over N consecutive "
-                                                       "iterations (default = " << defaultAvgRuns << ")" << std::endl <<
-          "  --percentile=P              Report performance for the P percentage (0<=P<=100, 0 "
-                                        "representing max perf, and 100 representing min perf; (default"
-                                                                      " = " << defaultPercentile << "%)" << std::endl <<
-          "  --dumpRefit                 Print the refittable layers and weights from a refittable "
-                                        "engine"                                                         << std::endl <<
-          "  --dumpOutput                Print the output tensor(s) of the last inference iteration "
-                                                                                  "(default = disabled)" << std::endl <<
-          "  --dumpProfile               Print profile information per layer (default = disabled)"       << std::endl <<
-          "  --dumpLayerInfo             Print layer information of the engine to console "
-                                                                                "(default = disabled)"   << std::endl <<
-          "  --exportTimes=<file>        Write the timing results in a json file (default = disabled)"   << std::endl <<
-          "  --exportOutput=<file>       Write the output tensors to a json file (default = disabled)"   << std::endl <<
-          "  --exportProfile=<file>      Write the profile information per layer in a json file "
-                                                                              "(default = disabled)"     << std::endl <<
-          "  --exportLayerInfo=<file>    Write the layer information of the engine in a json file "
-                                                                              "(default = disabled)"     << std::endl;
-  // clang-format on
-}
-
-void helpHelp(std::ostream& os) {
-  // clang-format off
-    os << "=== Help ==="                                     << std::endl <<
-          "  --help, -h                  Print this message" << std::endl;
-  // clang-format on
-}
-
-void AllOptions::help(std::ostream& os) {
-  ModelOptions::help(os);
-  os << std::endl;
-  BuildOptions::help(os);
-  os << std::endl;
-  InferenceOptions::help(os);
-  os << std::endl;
-  // clang-format off
-    os << "=== Build and Inference Batch Options ==="                                                                   << std::endl <<
-          "                              When using implicit batch, the max batch size of the engine, if not given, "   << std::endl <<
-          "                              is set to the inference batch size;"                                           << std::endl <<
-          "                              when using explicit batch, if shapes are specified only for inference, they "  << std::endl <<
-          "                              will be used also as min/opt/max in the build profile; if shapes are "         << std::endl <<
-          "                              specified only for the build, the opt shapes will be used also for inference;" << std::endl <<
-          "                              if both are specified, they must be compatible; and if explicit batch is "     << std::endl <<
-          "                              enabled but neither is specified, the model must provide complete static"      << std::endl <<
-          "                              dimensions, including batch size, for all inputs"                              << std::endl <<
-          "                              Using ONNX models automatically forces explicit batch."                        << std::endl <<
-    std::endl;
-  // clang-format on
-  ReportingOptions::help(os);
-  os << std::endl;
-  SystemOptions::help(os);
-  os << std::endl;
-  helpHelp(os);
-}
-
-void SafeBuilderOptions::printHelp(std::ostream& os) {
-  // clang-format off
-    os << "=== Mandatory ==="                                                                                                                << std::endl <<
-          "  --onnx=<file>               ONNX model"                                                                                         << std::endl <<
-          " "                                                                                                                                << std::endl <<
-          "=== Optional ==="                                                                                                                 << std::endl <<
-          "  --inputIOFormats=spec       Type and format of each of the input tensors (default = all inputs in fp32:chw)"                    << std::endl <<
-          "                              See --outputIOFormats help for the grammar of type and format list."                                << std::endl <<
-          "                              Note: If this option is specified, please set comma-separated types and formats for all"            << std::endl <<
-          "                                    inputs following the same order as network inputs ID (even if only one input"                 << std::endl <<
-          "                                    needs specifying IO format) or set the type and format once for broadcasting."                << std::endl <<
-          "  --outputIOFormats=spec      Type and format of each of the output tensors (default = all outputs in fp32:chw)"                  << std::endl <<
-          "                              Note: If this option is specified, please set comma-separated types and formats for all"            << std::endl <<
-          "                                    outputs following the same order as network outputs ID (even if only one output"              << std::endl <<
-          "                                    needs specifying IO format) or set the type and format once for broadcasting."                << std::endl <<
-          "                              IO Formats: spec  ::= IOfmt[\",\"spec]"                                                             << std::endl <<
-          "                                          IOfmt ::= type:fmt"                                                                     << std::endl <<
-          "                                          type  ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\""                                         << std::endl <<
-          "                                          fmt   ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl <<
-          "  --int8                      Enable int8 precision, in addition to fp16 (default = disabled)"                                    << std::endl <<
-          "  --consistency               Enable consistency check for serialized engine, (default = disabled)"                               << std::endl <<
-          "  --std                       Build standard serialized engine, (default = disabled)"                                             << std::endl <<
-          "  --calib=<file>              Read INT8 calibration cache file"                                                                   << std::endl <<
-          "  --serialized=<file>         Save the serialized network"                                                                        << std::endl <<
-          "  --plugins                   Plugin library (.so) to load (can be specified multiple times)"                                     << std::endl <<
-          "  --verbose or -v             Use verbose logging (default = false)"                                                              << std::endl <<
-          "  --help or -h                Print this message"                                                                                 << std::endl <<
-          " "                                                                                                                                << std::endl;
-  // clang-format on
-}
-
-} // namespace sample
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h
deleted file mode 100644
index 99293da10..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_OPTIONS_H
-#define TRT_SAMPLE_OPTIONS_H
-
-#include <algorithm>
-#include <array>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-#include "NvInfer.h"
-
-namespace sample {
-
-// Build default params
-constexpr int32_t maxBatchNotProvided{0};
-constexpr int32_t defaultMinTiming{1};
-constexpr int32_t defaultAvgTiming{8};
-
-// System default params
-constexpr int32_t defaultDevice{0};
-
-// Inference default params
-constexpr int32_t defaultBatch{1};
-constexpr int32_t batchNotProvided{0};
-constexpr int32_t defaultStreams{1};
-constexpr int32_t defaultIterations{10};
-constexpr float defaultWarmUp{200.F};
-constexpr float defaultDuration{3.F};
-constexpr float defaultSleep{};
-constexpr float defaultIdle{};
-
-// Reporting default params
-constexpr int32_t defaultAvgRuns{10};
-constexpr float defaultPercentile{99};
-
-enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
-
-enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
-
-enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
-
-enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
-
-using Arguments = std::unordered_multimap<std::string, std::string>;
-
-using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
-
-using ShapeRange =
-    std::array<std::vector<int32_t>,
-               nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
-
-using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
-using LayerOutputTypes =
-    std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
-
-struct Options {
-  virtual void parse(Arguments& arguments) = 0;
-};
-
-struct BaseModelOptions : public Options {
-  ModelFormat format{ModelFormat::kANY};
-  std::string model;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct UffInput : public Options {
-  std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
-  bool NHWC{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct ModelOptions : public Options {
-  BaseModelOptions baseModel;
-  std::string prototxt;
-  std::vector<std::string> outputs;
-  UffInput uffInputs;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct BuildOptions : public Options {
-  int32_t maxBatch{maxBatchNotProvided};
-  double workspace{-1.0};
-  double dlaSRAM{-1.0};
-  double dlaLocalDRAM{-1.0};
-  double dlaGlobalDRAM{-1.0};
-  int32_t minTiming{defaultMinTiming};
-  int32_t avgTiming{defaultAvgTiming};
-  bool tf32{true};
-  bool fp16{false};
-  bool int8{false};
-  bool directIO{false};
-  PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
-  LayerPrecisions layerPrecisions;
-  LayerOutputTypes layerOutputTypes;
-  bool safe{false};
-  bool consistency{false};
-  bool restricted{false};
-  bool save{false};
-  bool load{false};
-  bool refittable{false};
-  SparsityFlag sparsity{SparsityFlag::kDISABLE};
-  nvinfer1::ProfilingVerbosity profilingVerbosity{
-      nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
-  std::string engine;
-  std::string calibration;
-  std::unordered_map<std::string, ShapeRange> shapes;
-  std::unordered_map<std::string, ShapeRange> shapesCalib;
-  std::vector<IOFormat> inputFormats;
-  std::vector<IOFormat> outputFormats;
-  nvinfer1::TacticSources enabledTactics{0};
-  nvinfer1::TacticSources disabledTactics{0};
-  TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
-  std::string timingCacheFile{};
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct SystemOptions : public Options {
-  int32_t device{defaultDevice};
-  int32_t DLACore{-1};
-  bool fallback{false};
-  std::vector<std::string> plugins;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct InferenceOptions : public Options {
-  int32_t batch{batchNotProvided};
-  int32_t iterations{defaultIterations};
-  int32_t streams{defaultStreams};
-  float warmup{defaultWarmUp};
-  float duration{defaultDuration};
-  float sleep{defaultSleep};
-  float idle{defaultIdle};
-  bool overlap{true};
-  bool skipTransfers{false};
-  bool useManaged{false};
-  bool spin{false};
-  bool threads{false};
-  bool graph{false};
-  bool skip{false};
-  bool rerun{false};
-  bool timeDeserialize{false};
-  bool timeRefit{false};
-  std::unordered_map<std::string, std::string> inputs;
-  std::unordered_map<std::string, std::vector<int32_t>> shapes;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct ReportingOptions : public Options {
-  bool verbose{false};
-  int32_t avgs{defaultAvgRuns};
-  float percentile{defaultPercentile};
-  bool refit{false};
-  bool output{false};
-  bool profile{false};
-  bool layerInfo{false};
-  std::string exportTimes;
-  std::string exportOutput;
-  std::string exportProfile;
-  std::string exportLayerInfo;
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-struct SafeBuilderOptions : public Options {
-  std::string serialized{};
-  std::string onnxModelFile{};
-  bool help{false};
-  bool verbose{false};
-  std::vector<IOFormat> inputFormats;
-  std::vector<IOFormat> outputFormats;
-  bool int8{false};
-  std::string calibFile{};
-  std::vector<std::string> plugins;
-  bool consistency{false};
-  bool standard{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void printHelp(std::ostream& out);
-};
-
-struct AllOptions : public Options {
-  ModelOptions model;
-  BuildOptions build;
-  SystemOptions system;
-  InferenceOptions inference;
-  ReportingOptions reporting;
-  bool helps{false};
-
-  void parse(Arguments& arguments) override;
-
-  static void help(std::ostream& out);
-};
-
-Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
-
-bool parseHelp(Arguments& arguments);
-
-void helpHelp(std::ostream& out);
-
-// Functions to print options
-
-std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const UffInput& input);
-
-std::ostream& operator<<(std::ostream& os, const IOFormat& format);
-
-std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
-
-std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const AllOptions& options);
-
-std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
-
-inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
-  for (int32_t i = 0; i < dims.nbDims; ++i) {
-    os << (i ? "x" : "") << dims.d[i];
-  }
-  return os;
-}
-inline std::ostream& operator<<(std::ostream& os,
-                                const nvinfer1::WeightsRole role) {
-  switch (role) {
-  case nvinfer1::WeightsRole::kKERNEL: {
-    os << "Kernel";
-    break;
-  }
-  case nvinfer1::WeightsRole::kBIAS: {
-    os << "Bias";
-    break;
-  }
-  case nvinfer1::WeightsRole::kSHIFT: {
-    os << "Shift";
-    break;
-  }
-  case nvinfer1::WeightsRole::kSCALE: {
-    os << "Scale";
-    break;
-  }
-  case nvinfer1::WeightsRole::kCONSTANT: {
-    os << "Constant";
-    break;
-  }
-  case nvinfer1::WeightsRole::kANY: {
-    os << "Any";
-    break;
-  }
-  }
-
-  return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os,
-                                const std::vector<int32_t>& vec) {
-  for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
-    os << (i ? "x" : "") << vec[i];
-  }
-  return os;
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLES_OPTIONS_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
deleted file mode 100644
index 5e8e8619b..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <exception>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <numeric>
-#include <utility>
-
-#include "sampleInference.h"
-#include "sampleOptions.h"
-#include "sampleReporting.h"
-
-using namespace nvinfer1;
-
-namespace sample {
-
-namespace {
-
-//!
-//! \brief Find percentile in an ascending sequence of timings
-//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
-//!
-template <typename T>
-float findPercentile(float percentile,
-                     std::vector<InferenceTime> const& timings,
-                     T const& toFloat) {
-  int32_t const all = static_cast<int32_t>(timings.size());
-  int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
-  if (timings.empty()) {
-    return std::numeric_limits<float>::infinity();
-  }
-  if (percentile < 0.0f || percentile > 100.0f) {
-    throw std::runtime_error("percentile is not in [0, 100]!");
-  }
-  return toFloat(timings[std::max(all - 1 - exclude, 0)]);
-}
-
-//!
-//! \brief Find median in a sorted sequence of timings
-//!
-template <typename T>
-float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
-  if (timings.empty()) {
-    return std::numeric_limits<float>::infinity();
-  }
-
-  int32_t const m = timings.size() / 2;
-  if (timings.size() % 2) {
-    return toFloat(timings[m]);
-  }
-
-  return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
-}
-
-//!
-//! \brief Find coefficient of variance (which is std / mean) in a sorted
-//! sequence of timings given the mean
-//!
-template <typename T>
-float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
-                          T const& toFloat, float mean) {
-  if (timings.empty()) {
-    return 0;
-  }
-
-  if (mean == 0.F) {
-    return std::numeric_limits<float>::infinity();
-  }
-
-  auto const metricAccumulator = [toFloat, mean](float acc,
-                                                 InferenceTime const& a) {
-    float const diff = toFloat(a) - mean;
-    return acc + diff * diff;
-  };
-  float const variance =
-      std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
-      timings.size();
-
-  return std::sqrt(variance) / mean * 100.F;
-}
-
-inline InferenceTime traceToTiming(const InferenceTrace& a) {
-  return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
-                       (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
-                       (a.d2hEnd - a.h2dStart));
-}
-
-} // namespace
-
-void printProlog(int32_t warmups, int32_t timings, float warmupMs,
-                 float benchTimeMs, std::ostream& os) {
-  os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
-     << std::endl;
-  os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
-     << " s" << std::endl;
-}
-
-void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
-                 std::ostream& os) {
-  int32_t count = 0;
-  InferenceTime sum;
-
-  os << std::endl;
-  os << "=== Trace details ===" << std::endl;
-  os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
-  for (auto const& t : timings) {
-    sum += t;
-
-    if (++count == runsPerAvg) {
-      // clang-format off
-            os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
-               << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
-               << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
-      // clang-format on
-      count = 0;
-      sum.enq = 0;
-      sum.h2d = 0;
-      sum.compute = 0;
-      sum.d2h = 0;
-      sum.e2e = 0;
-    }
-  }
-}
-
-void printMetricExplanations(std::ostream& os) {
-  os << std::endl;
-  os << "=== Explanations of the performance metrics ===" << std::endl;
-  os << "Total Host Walltime: the host walltime from when the first query "
-        "(after warmups) is enqueued to when the "
-        "last query is completed."
-     << std::endl;
-  os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
-     << std::endl;
-  os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
-        "the queries. If this is significantly "
-        "shorter than Total Host Walltime, the GPU may be under-utilized "
-        "because of host-side overheads or data "
-        "transfers."
-     << std::endl;
-  os << "Throughput: the observed throughput computed by dividing the number "
-        "of queries by the Total Host Walltime. "
-        "If this is significantly lower than the reciprocal of GPU Compute "
-        "Time, the GPU may be under-utilized "
-        "because of host-side overheads or data transfers."
-     << std::endl;
-  os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
-        "than GPU Compute Time, the GPU may be "
-        "under-utilized."
-     << std::endl;
-  os << "H2D Latency: the latency for host-to-device data transfers for input "
-        "tensors of a single query."
-     << std::endl;
-  os << "D2H Latency: the latency for device-to-host data transfers for output "
-        "tensors of a single query."
-     << std::endl;
-  os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
-        "Latency. This is the latency to infer a "
-        "single query."
-     << std::endl;
-  os << "End-to-End Host Latency: the duration from when the H2D of a query is "
-        "called to when the D2H of the same "
-        "query is completed, which includes the latency to wait for the "
-        "completion of the previous query. This is "
-        "the latency of a query if multiple queries are enqueued consecutively."
-     << std::endl;
-}
-
-PerformanceResult
-getPerformanceResult(std::vector<InferenceTime> const& timings,
-                     std::function<float(InferenceTime const&)> metricGetter,
-                     float percentile) {
-  auto const metricComparator = [metricGetter](InferenceTime const& a,
-                                               InferenceTime const& b) {
-    return metricGetter(a) < metricGetter(b);
-  };
-  auto const metricAccumulator = [metricGetter](float acc,
-                                                InferenceTime const& a) {
-    return acc + metricGetter(a);
-  };
-  std::vector<InferenceTime> newTimings = timings;
-  std::sort(newTimings.begin(), newTimings.end(), metricComparator);
-  PerformanceResult result;
-  result.min = metricGetter(newTimings.front());
-  result.max = metricGetter(newTimings.back());
-  result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
-                                metricAccumulator) /
-                newTimings.size();
-  result.median = findMedian(newTimings, metricGetter);
-  result.percentile = findPercentile(percentile, newTimings, metricGetter);
-  result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
-  return result;
-}
-
-void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
-                 float percentile, int32_t batchSize, std::ostream& osInfo,
-                 std::ostream& osWarning, std::ostream& osVerbose) {
-  float const throughput = batchSize * timings.size() / walltimeMs * 1000;
-
-  auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
-  auto const latencyResult =
-      getPerformanceResult(timings, getLatency, percentile);
-
-  auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
-  auto const e2eLatencyResult =
-      getPerformanceResult(timings, getEndToEnd, percentile);
-
-  auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
-  auto const enqueueResult =
-      getPerformanceResult(timings, getEnqueue, percentile);
-
-  auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
-  auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
-
-  auto const getCompute = [](InferenceTime const& t) { return t.compute; };
-  auto const gpuComputeResult =
-      getPerformanceResult(timings, getCompute, percentile);
-
-  auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
-  auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
-
-  auto const toPerfString = [percentile](const PerformanceResult& r) {
-    std::stringstream s;
-    s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
-      << " ms, "
-      << "median = " << r.median << " ms, percentile(" << percentile
-      << "%) = " << r.percentile << " ms";
-    return s.str();
-  };
-
-  osInfo << std::endl;
-  osInfo << "=== Performance summary ===" << std::endl;
-  osInfo << "Throughput: " << throughput << " qps" << std::endl;
-  osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
-  osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
-         << std::endl;
-  osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
-  osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
-  osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
-  osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
-  osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
-  osInfo << "Total GPU Compute Time: "
-         << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
-
-  // Report warnings if the throughput is bound by other factors than GPU
-  // Compute Time.
-  constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
-  if (enqueueResult.median >
-      kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
-                 "Compute and the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  If not already in use, --useCudaGraph (utilize CUDA graphs "
-                 "where possible) may increase the "
-                 "throughput."
-              << std::endl;
-  }
-  if (h2dResult.median >= gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by host-to-device transfers for "
-                 "the inputs rather than GPU Compute and "
-                 "the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  Add --noDataTransfers flag to disable data transfers."
-              << std::endl;
-  }
-  if (d2hResult.median >= gpuComputeResult.median) {
-    osWarning << "* Throughput may be bound by device-to-host transfers for "
-                 "the outputs rather than GPU Compute "
-                 "and the GPU may be under-utilized."
-              << std::endl;
-    osWarning << "  Add --noDataTransfers flag to disable data transfers."
-              << std::endl;
-  }
-
-  // Report warnings if the GPU Compute Time is unstable.
-  constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
-  if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
-    osWarning
-        << "* GPU compute time is unstable, with coefficient of variance = "
-        << gpuComputeResult.coeffVar << "%." << std::endl;
-    osWarning << "  If not already in use, locking GPU clock frequency or "
-                 "adding --useSpinWait may improve the "
-              << "stability." << std::endl;
-  }
-
-  // Explain what the metrics mean.
-  osInfo << "Explanations of the performance metrics are printed in the "
-            "verbose logs."
-         << std::endl;
-  printMetricExplanations(osVerbose);
-
-  osInfo << std::endl;
-}
-
-void printPerformanceReport(std::vector<InferenceTrace> const& trace,
-                            const ReportingOptions& reporting, float warmupMs,
-                            int32_t batchSize, std::ostream& osInfo,
-                            std::ostream& osWarning, std::ostream& osVerbose) {
-  auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
-    return a.computeStart >= warmupMs;
-  };
-  auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
-  int32_t const warmups = noWarmup - trace.begin();
-  float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
-  // when implicit batch used, batchSize = options.inference.batch, which is
-  // parsed through --batch
-  // when explicit batch used, batchSize = options.inference.batch = 0
-  // treat inference with explicit batch as a single query and report the
-  // throughput
-  batchSize = batchSize ? batchSize : 1;
-  printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
-              warmupMs, benchTime, osInfo);
-
-  std::vector<InferenceTime> timings(trace.size() - warmups);
-  std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
-  printTiming(timings, reporting.avgs, osInfo);
-  printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
-              osWarning, osVerbose);
-
-  if (!reporting.exportTimes.empty()) {
-    exportJSONTrace(trace, reporting.exportTimes);
-  }
-}
-
-//! Printed format:
-//! [ value, ...]
-//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
-//! h2d" : time, "start compute" : time,
-//!             "end compute" : time, "start d2h" : time, "end d2h" : time,
-//!             "h2d" : time, "compute" : time,
-//!             "d2h" : time, "latency" : time, "end to end" : time }
-//!
-void exportJSONTrace(std::vector<InferenceTrace> const& trace,
-                     std::string const& fileName) {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  os << "[" << std::endl;
-  char const* sep = "  ";
-  for (auto const& t : trace) {
-    InferenceTime const it(traceToTiming(t));
-    os << sep << "{ ";
-    sep = ", ";
-    // clang-format off
-        os << "\"startEnqMs\" : "     << t.enqStart     << sep << "\"endEnqMs\" : "     << t.enqEnd     << sep
-           << "\"startH2dMs\" : "     << t.h2dStart     << sep << "\"endH2dMs\" : "     << t.h2dEnd     << sep
-           << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
-           << "\"startD2hMs\" : "     << t.d2hStart     << sep << "\"endD2hMs\" : "     << t.d2hEnd     << sep
-           << "\"h2dMs\" : "          << it.h2d         << sep << "\"computeMs\" : "    << it.compute   << sep
-           << "\"d2hMs\" : "          << it.d2h         << sep << "\"latencyMs\" : "    << it.latency() << sep
-           << "\"endToEndMs\" : "     << it.e2e         << " }"                                         << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
-  if (mIterator == mLayers.end()) {
-    bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
-    mUpdatesCount += mLayers.empty() || first;
-    if (first) {
-      mIterator = mLayers.begin();
-    } else {
-      mLayers.emplace_back();
-      mLayers.back().name = layerName;
-      mIterator = mLayers.end() - 1;
-    }
-  }
-
-  mIterator->timeMs += timeMs;
-  ++mIterator;
-}
-
-void Profiler::print(std::ostream& os) const noexcept {
-  std::string const nameHdr("Layer");
-  std::string const timeHdr("   Time (ms)");
-  std::string const avgHdr("   Avg. Time (ms)");
-  std::string const percentageHdr("   Time %");
-
-  float const totalTimeMs = getTotalTime();
-
-  auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
-    return a.name.size() < b.name.size();
-  };
-  auto const longestName =
-      std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
-  auto const nameLength =
-      std::max(longestName->name.size() + 1, nameHdr.size());
-  auto const timeLength = timeHdr.size();
-  auto const avgLength = avgHdr.size();
-  auto const percentageLength = percentageHdr.size();
-
-  os << std::endl
-     << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
-     << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
-     << std::endl;
-
-  for (auto const& p : mLayers) {
-    // clang-format off
-        os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
-           << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
-           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
-           << std::endl;
-    }
-    {
-        os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
-           << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
-           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
-    // clang-format on
-  }
-  os << std::endl;
-}
-
-void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  os << "[" << std::endl
-     << "  { \"count\" : " << mUpdatesCount << " }" << std::endl;
-
-  auto const totalTimeMs = getTotalTime();
-
-  for (auto const& l : mLayers) {
-    // clang-format off
-        os << ", {" << " \"name\" : \""      << l.name << "\""
-                       ", \"timeMs\" : "     << l.timeMs
-           <<          ", \"averageMs\" : "  << l.timeMs / mUpdatesCount
-           <<          ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
-           << " }"  << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-void dumpInputs(nvinfer1::IExecutionContext const& context,
-                Bindings const& bindings, std::ostream& os) {
-  os << "Input Tensors:" << std::endl;
-  bindings.dumpInputs(context, os);
-}
-
-void dumpOutputs(nvinfer1::IExecutionContext const& context,
-                 Bindings const& bindings, std::ostream& os) {
-  os << "Output Tensors:" << std::endl;
-  bindings.dumpOutputs(context, os);
-}
-
-void exportJSONOutput(nvinfer1::IExecutionContext const& context,
-                      Bindings const& bindings, std::string const& fileName,
-                      int32_t batch) {
-  std::ofstream os(fileName, std::ofstream::trunc);
-  std::string sep = "  ";
-  auto const output = bindings.getOutputBindings();
-  os << "[" << std::endl;
-  for (auto const& binding : output) {
-    // clang-format off
-        os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
-        sep = ", ";
-        os << "  " << sep << "\"dimensions\" : \"";
-        bindings.dumpBindingDimensions(binding.second, context, os);
-        os << "\"" << std::endl;
-        os << "  " << sep << "\"values\" : [ ";
-        bindings.dumpBindingValues(context, binding.second, os, sep, batch);
-        os << " ]" << std::endl << "  }" << std::endl;
-    // clang-format on
-  }
-  os << "]" << std::endl;
-}
-
-} // namespace sample
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h
deleted file mode 100644
index 68b78af9c..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_REPORTING_H
-#define TRT_SAMPLE_REPORTING_H
-
-#include <functional>
-#include <iostream>
-
-#include "NvInfer.h"
-
-#include "sampleOptions.h"
-#include "sampleUtils.h"
-
-namespace sample {
-
-//!
-//! \struct InferenceTime
-//! \brief Measurement times in milliseconds
-//!
-struct InferenceTime {
-  InferenceTime(float q, float i, float c, float o, float e)
-      : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
-
-  InferenceTime() = default;
-  InferenceTime(InferenceTime const&) = default;
-  InferenceTime(InferenceTime&&) = default;
-  InferenceTime& operator=(InferenceTime const&) = default;
-  InferenceTime& operator=(InferenceTime&&) = default;
-  ~InferenceTime() = default;
-
-  float enq{0};     // Enqueue
-  float h2d{0};     // Host to Device
-  float compute{0}; // Compute
-  float d2h{0};     // Device to Host
-  float e2e{0};     // end to end
-
-  // ideal latency
-  float latency() const { return h2d + compute + d2h; }
-};
-
-//!
-//! \struct InferenceTrace
-//! \brief Measurement points in milliseconds
-//!
-struct InferenceTrace {
-  InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
-                 float ce, float os, float oe)
-      : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
-        computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
-
-  InferenceTrace() = default;
-  InferenceTrace(InferenceTrace const&) = default;
-  InferenceTrace(InferenceTrace&&) = default;
-  InferenceTrace& operator=(InferenceTrace const&) = default;
-  InferenceTrace& operator=(InferenceTrace&&) = default;
-  ~InferenceTrace() = default;
-
-  int32_t stream{0};
-  float enqStart{0};
-  float enqEnd{0};
-  float h2dStart{0};
-  float h2dEnd{0};
-  float computeStart{0};
-  float computeEnd{0};
-  float d2hStart{0};
-  float d2hEnd{0};
-};
-
-inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
-  return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
-                       a.d2h + b.d2h, a.e2e + b.e2e);
-}
-
-inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
-  return a = a + b;
-}
-
-//!
-//! \struct PerformanceResult
-//! \brief Performance result of a performance metric
-//!
-struct PerformanceResult {
-  float min{0};
-  float max{0};
-  float mean{0};
-  float median{0};
-  float percentile{0};
-  float coeffVar{0}; // coefficient of variation
-};
-
-//!
-//! \brief Print benchmarking time and number of traces collected
-//!
-void printProlog(int32_t warmups, int32_t timings, float warmupMs,
-                 float walltime, std::ostream& os);
-
-//!
-//! \brief Print a timing trace
-//!
-void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
-                 std::ostream& os);
-
-//!
-//! \brief Print the performance summary of a trace
-//!
-void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
-                 int32_t batchSize, std::ostream& osInfo,
-                 std::ostream& osWarning, std::ostream& osVerbose);
-
-//!
-//! \brief Get the result of a specific performance metric from a trace
-//!
-PerformanceResult
-getPerformanceResult(std::vector<InferenceTime> const& timings,
-                     std::function<float(InferenceTime const&)> metricGetter,
-                     float percentile);
-
-//!
-//! \brief Print the explanations of the performance metrics printed in
-//! printEpilog() function.
-//!
-void printMetricExplanations(std::ostream& os);
-
-//!
-//! \brief Print and summarize a timing trace
-//!
-void printPerformanceReport(std::vector<InferenceTrace> const& trace,
-                            ReportingOptions const& reporting, float warmupMs,
-                            int32_t batchSize, std::ostream& osInfo,
-                            std::ostream& osWarning, std::ostream& osVerbose);
-
-//!
-//! \brief Export a timing trace to JSON file
-//!
-void exportJSONTrace(std::vector<InferenceTrace> const& trace,
-                     std::string const& fileName);
-
-//!
-//! \brief Print input tensors to stream
-//!
-void dumpInputs(nvinfer1::IExecutionContext const& context,
-                Bindings const& bindings, std::ostream& os);
-
-//!
-//! \brief Print output tensors to stream
-//!
-void dumpOutputs(nvinfer1::IExecutionContext const& context,
-                 Bindings const& bindings, std::ostream& os);
-
-//!
-//! \brief Export output tensors to JSON file
-//!
-void exportJSONOutput(nvinfer1::IExecutionContext const& context,
-                      Bindings const& bindings, std::string const& fileName,
-                      int32_t batch);
-
-//!
-//! \struct LayerProfile
-//! \brief Layer profile information
-//!
-struct LayerProfile {
-  std::string name;
-  float timeMs{0};
-};
-
-//!
-//! \class Profiler
-//! \brief Collect per-layer profile information, assuming times are reported in
-//! the same order
-//!
-class Profiler : public nvinfer1::IProfiler {
- public:
-  void reportLayerTime(char const* layerName, float timeMs) noexcept override;
-
-  void print(std::ostream& os) const noexcept;
-
-  //!
-  //! \brief Export a profile to JSON file
-  //!
-  void exportJSONProfile(std::string const& fileName) const noexcept;
-
- private:
-  float getTotalTime() const noexcept {
-    auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
-      return accumulator + lp.timeMs;
-    };
-    return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
-  }
-
-  std::vector<LayerProfile> mLayers;
-  std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
-  int32_t mUpdatesCount{0};
-};
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_REPORTING_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
deleted file mode 100644
index 2c6f415bc..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TRT_SAMPLE_UTILS_H
-#define TRT_SAMPLE_UTILS_H
-
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <numeric>
-#include <random>
-#include <unordered_map>
-#include <vector>
-
-#include <cuda.h>
-#include <cuda_fp16.h>
-
-#include "NvInfer.h"
-
-#include "common.h"
-#include "logger.h"
-#include "sampleDevice.h"
-#include "sampleOptions.h"
-
-namespace sample {
-
-inline int dataTypeSize(nvinfer1::DataType dataType) {
-  switch (dataType) {
-  case nvinfer1::DataType::kINT32:
-  case nvinfer1::DataType::kFLOAT:
-    return 4;
-  case nvinfer1::DataType::kHALF:
-    return 2;
-  case nvinfer1::DataType::kBOOL:
-  case nvinfer1::DataType::kINT8:
-    return 1;
-  }
-  return 0;
-}
-
-template <typename T> inline T roundUp(T m, T n) {
-  return ((m + n - 1) / n) * n;
-}
-
-inline int volume(const nvinfer1::Dims& d) {
-  return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
-}
-
-//! comps is the number of components in a vector. Ignored if vecDim < 0.
-inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
-                      int vecDim, int comps, int batch) {
-  int maxNbElems = 1;
-  for (int i = 0; i < dims.nbDims; ++i) {
-    // Get effective length of axis.
-    int d = dims.d[i];
-    // Any dimension is 0, it is an empty tensor.
-    if (d == 0) {
-      return 0;
-    }
-    if (i == vecDim) {
-      d = samplesCommon::divUp(d, comps);
-    }
-    maxNbElems = std::max(maxNbElems, d * strides.d[i]);
-  }
-  return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
-}
-
-inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
-  if (vecDim != -1) {
-    dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
-  }
-  return volume(dims) * std::max(batch, 1);
-}
-
-inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
-  int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
-  if (static_cast<int>(vec.size()) > limit) {
-    sample::gLogWarning
-        << "Vector too long, only first 8 elements are used in dimension."
-        << std::endl;
-  }
-  // Pick first nvinfer1::Dims::MAX_DIMS elements
-  nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
-  std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
-  return dims;
-}
-
-template <typename T>
-inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
-  T* typedBuffer = static_cast<T*>(buffer);
-  std::default_random_engine engine;
-  if (std::is_integral<T>::value) {
-    std::uniform_int_distribution<int> distribution(min, max);
-    auto generator = [&engine, &distribution]() {
-      return static_cast<T>(distribution(engine));
-    };
-    std::generate(typedBuffer, typedBuffer + volume, generator);
-  } else {
-    std::uniform_real_distribution<float> distribution(min, max);
-    auto generator = [&engine, &distribution]() {
-      return static_cast<T>(distribution(engine));
-    };
-    std::generate(typedBuffer, typedBuffer + volume, generator);
-  }
-}
-
-// Specialization needed for custom type __half
-template <typename H>
-inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
-  H* typedBuffer = static_cast<H*>(buffer);
-  std::default_random_engine engine;
-  std::uniform_real_distribution<float> distribution(min, max);
-  auto generator = [&engine, &distribution]() {
-    return static_cast<H>(distribution(engine));
-  };
-  std::generate(typedBuffer, typedBuffer + volume, generator);
-}
-template <>
-inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
-                               __half max) {
-  fillBufferHalf(buffer, volume, min, max);
-}
-
-template <typename T>
-inline void dumpBuffer(const void* buffer, const std::string& separator,
-                       std::ostream& os, const Dims& dims, const Dims& strides,
-                       int32_t vectorDim, int32_t spv) {
-  const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
-                                         std::multiplies<int64_t>());
-  const T* typedBuffer = static_cast<const T*>(buffer);
-  std::string sep;
-  for (int64_t v = 0; v < volume; ++v) {
-    int64_t curV = v;
-    int32_t dataOffset = 0;
-    for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
-      int32_t dimVal = curV % dims.d[dimIndex];
-      if (dimIndex == vectorDim) {
-        dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
-      } else {
-        dataOffset +=
-            dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
-      }
-      curV /= dims.d[dimIndex];
-      ASSERT(curV >= 0);
-    }
-
-    os << sep << typedBuffer[dataOffset];
-    sep = separator;
-  }
-}
-
-inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
-  ASSERT(dst);
-
-  std::ifstream file(fileName, std::ios::in | std::ios::binary);
-  if (file.is_open()) {
-    file.read(dst, size);
-    file.close();
-  } else {
-    std::stringstream msg;
-    msg << "Cannot open file " << fileName << "!";
-    throw std::invalid_argument(msg.str());
-  }
-}
-
-struct Binding {
-  bool isInput{false};
-  std::unique_ptr<IMirroredBuffer> buffer;
-  int64_t volume{0};
-  nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
-
-  void fill(const std::string& fileName) {
-    loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
-                 buffer->getSize());
-  }
-
-  void fill() {
-    switch (dataType) {
-    case nvinfer1::DataType::kBOOL: {
-      fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
-      break;
-    }
-    case nvinfer1::DataType::kINT32: {
-      fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
-      break;
-    }
-    case nvinfer1::DataType::kINT8: {
-      fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
-      break;
-    }
-    case nvinfer1::DataType::kFLOAT: {
-      fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
-      break;
-    }
-    case nvinfer1::DataType::kHALF: {
-      fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
-      break;
-    }
-    }
-  }
-
-  void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
-            int32_t spv, const std::string separator = " ") const {
-    switch (dataType) {
-    case nvinfer1::DataType::kBOOL: {
-      dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
-                       vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kINT32: {
-      dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
-                          vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kINT8: {
-      dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
-                         vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kFLOAT: {
-      dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
-                        vectorDim, spv);
-      break;
-    }
-    case nvinfer1::DataType::kHALF: {
-      dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
-                         vectorDim, spv);
-      break;
-    }
-    }
-  }
-};
-
-class Bindings {
- public:
-  Bindings() = delete;
-  explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
-
-  void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
-                  nvinfer1::DataType dataType,
-                  const std::string& fileName = "") {
-    while (mBindings.size() <= static_cast<size_t>(b)) {
-      mBindings.emplace_back();
-      mDevicePointers.emplace_back();
-    }
-    mNames[name] = b;
-    if (mBindings[b].buffer == nullptr) {
-      if (mUseManaged) {
-        mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
-      } else {
-        mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
-      }
-    }
-    mBindings[b].isInput = isInput;
-    // Some memory allocators return nullptr when allocating zero bytes, but
-    // TensorRT requires a non-null ptr
-    // even for empty tensors, so allocate a dummy byte.
-    if (volume == 0) {
-      mBindings[b].buffer->allocate(1);
-    } else {
-      mBindings[b].buffer->allocate(
-          static_cast<size_t>(volume) *
-          static_cast<size_t>(dataTypeSize(dataType)));
-    }
-    mBindings[b].volume = volume;
-    mBindings[b].dataType = dataType;
-    mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
-    if (isInput) {
-      if (fileName.empty()) {
-        fill(b);
-      } else {
-        fill(b, fileName);
-      }
-    }
-  }
-
-  void** getDeviceBuffers() { return mDevicePointers.data(); }
-
-  void transferInputToDevice(TrtCudaStream& stream) {
-    for (auto& b : mNames) {
-      if (mBindings[b.second].isInput) {
-        mBindings[b.second].buffer->hostToDevice(stream);
-      }
-    }
-  }
-
-  void transferOutputToHost(TrtCudaStream& stream) {
-    for (auto& b : mNames) {
-      if (!mBindings[b.second].isInput) {
-        mBindings[b.second].buffer->deviceToHost(stream);
-      }
-    }
-  }
-
-  void fill(int binding, const std::string& fileName) {
-    mBindings[binding].fill(fileName);
-  }
-
-  void fill(int binding) { mBindings[binding].fill(); }
-
-  void dumpBindingDimensions(int binding,
-                             const nvinfer1::IExecutionContext& context,
-                             std::ostream& os) const {
-    const auto dims = context.getBindingDimensions(binding);
-    // Do not add a newline terminator, because the caller may be outputting a
-    // JSON string.
-    os << dims;
-  }
-
-  void dumpBindingValues(const nvinfer1::IExecutionContext& context,
-                         int binding, std::ostream& os,
-                         const std::string& separator = " ",
-                         int32_t batch = 1) const {
-    Dims dims = context.getBindingDimensions(binding);
-    Dims strides = context.getStrides(binding);
-    int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
-    const int32_t spv =
-        context.getEngine().getBindingComponentsPerElement(binding);
-
-    if (context.getEngine().hasImplicitBatchDimension()) {
-      auto insertN = [](Dims& d, int32_t bs) {
-        const int32_t nbDims = d.nbDims;
-        ASSERT(nbDims < Dims::MAX_DIMS);
-        std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
-        d.d[0] = bs;
-        d.nbDims = nbDims + 1;
-      };
-      int32_t batchStride = 0;
-      for (int32_t i = 0; i < strides.nbDims; ++i) {
-        if (strides.d[i] * dims.d[i] > batchStride) {
-          batchStride = strides.d[i] * dims.d[i];
-        }
-      }
-      insertN(dims, batch);
-      insertN(strides, batchStride);
-      vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
-    }
-
-    mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
-  }
-
-  void dumpInputs(const nvinfer1::IExecutionContext& context,
-                  std::ostream& os) const {
-    auto isInput = [](const Binding& b) { return b.isInput; };
-    dumpBindings(context, isInput, os);
-  }
-
-  void dumpOutputs(const nvinfer1::IExecutionContext& context,
-                   std::ostream& os) const {
-    auto isOutput = [](const Binding& b) { return !b.isInput; };
-    dumpBindings(context, isOutput, os);
-  }
-
-  void dumpBindings(const nvinfer1::IExecutionContext& context,
-                    std::ostream& os) const {
-    auto all = [](const Binding& b) { return true; };
-    dumpBindings(context, all, os);
-  }
-
-  void dumpBindings(const nvinfer1::IExecutionContext& context,
-                    bool (*predicate)(const Binding& b),
-                    std::ostream& os) const {
-    for (const auto& n : mNames) {
-      const auto binding = n.second;
-      if (predicate(mBindings[binding])) {
-        os << n.first << ": (";
-        dumpBindingDimensions(binding, context, os);
-        os << ")" << std::endl;
-
-        dumpBindingValues(context, binding, os);
-        os << std::endl;
-      }
-    }
-  }
-
-  std::unordered_map<std::string, int> getInputBindings() const {
-    auto isInput = [](const Binding& b) { return b.isInput; };
-    return getBindings(isInput);
-  }
-
-  std::unordered_map<std::string, int> getOutputBindings() const {
-    auto isOutput = [](const Binding& b) { return !b.isInput; };
-    return getBindings(isOutput);
-  }
-
-  std::unordered_map<std::string, int> getBindings() const {
-    auto all = [](const Binding& b) { return true; };
-    return getBindings(all);
-  }
-
-  std::unordered_map<std::string, int>
-  getBindings(bool (*predicate)(const Binding& b)) const {
-    std::unordered_map<std::string, int> bindings;
-    for (const auto& n : mNames) {
-      const auto binding = n.second;
-      if (predicate(mBindings[binding])) {
-        bindings.insert(n);
-      }
-    }
-    return bindings;
-  }
-
- private:
-  std::unordered_map<std::string, int32_t> mNames;
-  std::vector<Binding> mBindings;
-  std::vector<void*> mDevicePointers;
-  bool mUseManaged{false};
-};
-
-template <typename T> struct TrtDestroyer {
-  void operator()(T* t) { t->destroy(); }
-};
-
-template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
-
-inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
-                               size_t nbBindings, bool isInput = true) {
-  bool broadcast = formats.size() == 1;
-  bool validFormatsCount = broadcast || (formats.size() == nbBindings);
-  if (!formats.empty() && !validFormatsCount) {
-    if (isInput) {
-      throw std::invalid_argument(
-          "The number of inputIOFormats must match network's inputs or be one "
-          "for broadcasting.");
-    } else {
-      throw std::invalid_argument(
-          "The number of outputIOFormats must match network's outputs or be "
-          "one for broadcasting.");
-    }
-  }
-  return broadcast;
-}
-
-inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
-  std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
-  if (!iFile) {
-    sample::gLogWarning << "Could not read timing cache from: " << inFileName
-                        << ". A new timing cache will be generated and written."
-                        << std::endl;
-    return std::vector<char>();
-  }
-  iFile.seekg(0, std::ifstream::end);
-  size_t fsize = iFile.tellg();
-  iFile.seekg(0, std::ifstream::beg);
-  std::vector<char> content(fsize);
-  iFile.read(content.data(), fsize);
-  iFile.close();
-  sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
-                   << inFileName << std::endl;
-  return content;
-}
-
-inline void saveTimingCacheFile(const std::string outFileName,
-                                const IHostMemory* blob) {
-  std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
-  if (!oFile) {
-    sample::gLogWarning << "Could not write timing cache to: " << outFileName
-                        << std::endl;
-    return;
-  }
-  oFile.write((char*)blob->data(), blob->size());
-  oFile.close();
-  sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
-                   << outFileName << std::endl;
-}
-
-inline int32_t getCudaDriverVersion() {
-  int32_t version{-1};
-  cudaCheck(cudaDriverGetVersion(&version));
-  return version;
-}
-
-inline int32_t getCudaRuntimeVersion() {
-  int32_t version{-1};
-  cudaCheck(cudaRuntimeGetVersion(&version));
-  return version;
-}
-
-} // namespace sample
-
-#endif // TRT_SAMPLE_UTILS_H
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c
deleted file mode 100644
index 515a55bb1..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c
+++ /dev/null
@@ -1,568 +0,0 @@
-/*	$OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $	*/
-/*	$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $	*/
-
-/*
- * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Sponsored in part by the Defense Advanced Research Projects
- * Agency (DARPA) and Air Force Research Laboratory, Air Force
- * Materiel Command, USAF, under agreement number F39502-99-1-0512.
- */
-/*-
- * Copyright (c) 2000 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Dieter Baron and Thomas Klausner.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <getopt.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <windows.h>
-
-#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
-
-#ifdef REPLACE_GETOPT
-int opterr = 1;   /* if error message should be printed */
-int optind = 1;   /* index into parent argv vector */
-int optopt = '?'; /* character checked for validity */
-#undef optreset   /* see getopt.h */
-#define optreset __mingw_optreset
-int optreset; /* reset getopt */
-char* optarg; /* argument associated with option */
-#endif
-
-#define PRINT_ERROR ((opterr) && (*options != ':'))
-
-#define FLAG_PERMUTE 0x01  /* permute non-options to the end of argv */
-#define FLAG_ALLARGS 0x02  /* treat non-options as args to option "-1" */
-#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
-
-/* return values */
-#define BADCH (int) '?'
-#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
-#define INORDER (int) 1
-
-#ifndef __CYGWIN__
-#define __progname __argv[0]
-#else
-extern char __declspec(dllimport) * __progname;
-#endif
-
-#ifdef __CYGWIN__
-static char EMSG[] = "";
-#else
-#define EMSG ""
-#endif
-
-static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int);
-static int parse_long_options(char* const*, const char*, const struct option*, int*, int);
-static int gcd(int, int);
-static void permute_args(int, int, int, char* const*);
-
-static char* place = EMSG; /* option letter processing */
-
-/* XXX: set optreset to 1 rather than these two */
-static int nonopt_start = -1; /* first non option argument (for permute) */
-static int nonopt_end = -1;   /* first option after non options (for permute) */
-
-/* Error messages */
-static const char recargchar[] = "option requires an argument -- %c";
-static const char recargstring[] = "option requires an argument -- %s";
-static const char ambig[] = "ambiguous option -- %.*s";
-static const char noarg[] = "option doesn't take an argument -- %.*s";
-static const char illoptchar[] = "unknown option -- %c";
-static const char illoptstring[] = "unknown option -- %s";
-
-static void _vwarnx(const char* fmt, va_list ap)
-{
-    (void) fprintf(stderr, "%s: ", __progname);
-    if (fmt != NULL)
-        (void) vfprintf(stderr, fmt, ap);
-    (void) fprintf(stderr, "\n");
-}
-
-static void warnx(const char* fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    _vwarnx(fmt, ap);
-    va_end(ap);
-}
-
-/*
- * Compute the greatest common divisor of a and b.
- */
-static int gcd(int a, int b)
-{
-    int c;
-
-    c = a % b;
-    while (c != 0)
-    {
-        a = b;
-        b = c;
-        c = a % b;
-    }
-
-    return (b);
-}
-
-/*
- * Exchange the block from nonopt_start to nonopt_end with the block
- * from nonopt_end to opt_end (keeping the same order of arguments
- * in each block).
- */
-static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
-{
-    int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
-    char* swap;
-
-    /*
-     * compute lengths of blocks and number and size of cycles
-     */
-    nnonopts = panonopt_end - panonopt_start;
-    nopts = opt_end - panonopt_end;
-    ncycle = gcd(nnonopts, nopts);
-    cyclelen = (opt_end - panonopt_start) / ncycle;
-
-    for (i = 0; i < ncycle; i++)
-    {
-        cstart = panonopt_end + i;
-        pos = cstart;
-        for (j = 0; j < cyclelen; j++)
-        {
-            if (pos >= panonopt_end)
-                pos -= nnonopts;
-            else
-                pos += nopts;
-            swap = nargv[pos];
-            /* LINTED const cast */
-            ((char**) nargv)[pos] = nargv[cstart];
-            /* LINTED const cast */
-            ((char**) nargv)[cstart] = swap;
-        }
-    }
-}
-
-/*
- * parse_long_options --
- *	Parse long options in argc/argv argument vector.
- * Returns -1 if short_too is set and the option does not match long_options.
- */
-static int parse_long_options(
-    char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too)
-{
-    char *current_argv, *has_equal;
-    size_t current_argv_len;
-    int i, ambiguous, match;
-
-#define IDENTICAL_INTERPRETATION(_x, _y)                                                                               \
-    (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag    \
-        && long_options[(_x)].val == long_options[(_y)].val)
-
-    current_argv = place;
-    match = -1;
-    ambiguous = 0;
-
-    optind++;
-
-    if ((has_equal = strchr(current_argv, '=')) != NULL)
-    {
-        /* argument found (--option=arg) */
-        current_argv_len = has_equal - current_argv;
-        has_equal++;
-    }
-    else
-        current_argv_len = strlen(current_argv);
-
-    for (i = 0; long_options[i].name; i++)
-    {
-        /* find matching long option */
-        if (strncmp(current_argv, long_options[i].name, current_argv_len))
-            continue;
-
-        if (strlen(long_options[i].name) == current_argv_len)
-        {
-            /* exact match */
-            match = i;
-            ambiguous = 0;
-            break;
-        }
-        /*
-         * If this is a known short option, don't allow
-         * a partial match of a single character.
-         */
-        if (short_too && current_argv_len == 1)
-            continue;
-
-        if (match == -1) /* partial match */
-            match = i;
-        else if (!IDENTICAL_INTERPRETATION(i, match))
-            ambiguous = 1;
-    }
-    if (ambiguous)
-    {
-        /* ambiguous abbreviation */
-        if (PRINT_ERROR)
-            warnx(ambig, (int) current_argv_len, current_argv);
-        optopt = 0;
-        return (BADCH);
-    }
-    if (match != -1)
-    { /* option found */
-        if (long_options[match].has_arg == no_argument && has_equal)
-        {
-            if (PRINT_ERROR)
-                warnx(noarg, (int) current_argv_len, current_argv);
-            /*
-             * XXX: GNU sets optopt to val regardless of flag
-             */
-            if (long_options[match].flag == NULL)
-                optopt = long_options[match].val;
-            else
-                optopt = 0;
-            return (BADARG);
-        }
-        if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
-        {
-            if (has_equal)
-                optarg = has_equal;
-            else if (long_options[match].has_arg == required_argument)
-            {
-                /*
-                 * optional argument doesn't use next nargv
-                 */
-                optarg = nargv[optind++];
-            }
-        }
-        if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
-        {
-            /*
-             * Missing argument; leading ':' indicates no error
-             * should be generated.
-             */
-            if (PRINT_ERROR)
-                warnx(recargstring, current_argv);
-            /*
-             * XXX: GNU sets optopt to val regardless of flag
-             */
-            if (long_options[match].flag == NULL)
-                optopt = long_options[match].val;
-            else
-                optopt = 0;
-            --optind;
-            return (BADARG);
-        }
-    }
-    else
-    { /* unknown option */
-        if (short_too)
-        {
-            --optind;
-            return (-1);
-        }
-        if (PRINT_ERROR)
-            warnx(illoptstring, current_argv);
-        optopt = 0;
-        return (BADCH);
-    }
-    if (idx)
-        *idx = match;
-    if (long_options[match].flag)
-    {
-        *long_options[match].flag = long_options[match].val;
-        return (0);
-    }
-    else
-        return (long_options[match].val);
-#undef IDENTICAL_INTERPRETATION
-}
-
-/*
- * getopt_internal --
- *	Parse argc/argv argument vector.  Called by user level routines.
- */
-static int getopt_internal(
-    int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags)
-{
-    const char* oli; /* option letter list index */
-    int optchar, short_too;
-    static int posixly_correct = -1;
-
-    if (options == NULL)
-        return (-1);
-
-    /*
-     * XXX Some GNU programs (like cvs) set optind to 0 instead of
-     * XXX using optreset.  Work around this braindamage.
-     */
-    if (optind == 0)
-        optind = optreset = 1;
-
-    /*
-     * Disable GNU extensions if POSIXLY_CORRECT is set or options
-     * string begins with a '+'.
-     *
-     * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
-     *                 optreset != 0 for GNU compatibility.
-     */
-    if (posixly_correct == -1 || optreset != 0)
-        posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
-    if (*options == '-')
-        flags |= FLAG_ALLARGS;
-    else if (posixly_correct || *options == '+')
-        flags &= ~FLAG_PERMUTE;
-    if (*options == '+' || *options == '-')
-        options++;
-
-    optarg = NULL;
-    if (optreset)
-        nonopt_start = nonopt_end = -1;
-start:
-    if (optreset || !*place)
-    { /* update scanning pointer */
-        optreset = 0;
-        if (optind >= nargc)
-        { /* end of argument vector */
-            place = EMSG;
-            if (nonopt_end != -1)
-            {
-                /* do permutation, if we have to */
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                optind -= nonopt_end - nonopt_start;
-            }
-            else if (nonopt_start != -1)
-            {
-                /*
-                 * If we skipped non-options, set optind
-                 * to the first of them.
-                 */
-                optind = nonopt_start;
-            }
-            nonopt_start = nonopt_end = -1;
-            return (-1);
-        }
-        if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
-        {
-            place = EMSG; /* found non-option */
-            if (flags & FLAG_ALLARGS)
-            {
-                /*
-                 * GNU extension:
-                 * return non-option as argument to option 1
-                 */
-                optarg = nargv[optind++];
-                return (INORDER);
-            }
-            if (!(flags & FLAG_PERMUTE))
-            {
-                /*
-                 * If no permutation wanted, stop parsing
-                 * at first non-option.
-                 */
-                return (-1);
-            }
-            /* do permutation */
-            if (nonopt_start == -1)
-                nonopt_start = optind;
-            else if (nonopt_end != -1)
-            {
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                nonopt_start = optind - (nonopt_end - nonopt_start);
-                nonopt_end = -1;
-            }
-            optind++;
-            /* process next argument */
-            goto start;
-        }
-        if (nonopt_start != -1 && nonopt_end == -1)
-            nonopt_end = optind;
-
-        /*
-         * If we have "-" do nothing, if "--" we are done.
-         */
-        if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
-        {
-            optind++;
-            place = EMSG;
-            /*
-             * We found an option (--), so if we skipped
-             * non-options, we have to permute.
-             */
-            if (nonopt_end != -1)
-            {
-                permute_args(nonopt_start, nonopt_end, optind, nargv);
-                optind -= nonopt_end - nonopt_start;
-            }
-            nonopt_start = nonopt_end = -1;
-            return (-1);
-        }
-    }
-
-    /*
-     * Check long options if:
-     *  1) we were passed some
-     *  2) the arg is not just "-"
-     *  3) either the arg starts with -- we are getopt_long_only()
-     */
-    if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
-    {
-        short_too = 0;
-        if (*place == '-')
-            place++; /* --foo long option */
-        else if (*place != ':' && strchr(options, *place) != NULL)
-            short_too = 1; /* could be short option too */
-
-        optchar = parse_long_options(nargv, options, long_options, idx, short_too);
-        if (optchar != -1)
-        {
-            place = EMSG;
-            return (optchar);
-        }
-    }
-
-    if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
-        || (oli = strchr(options, optchar)) == NULL)
-    {
-        /*
-         * If the user specified "-" and  '-' isn't listed in
-         * options, return -1 (non-option) as per POSIX.
-         * Otherwise, it is an unknown option character (or ':').
-         */
-        if (optchar == (int) '-' && *place == '\0')
-            return (-1);
-        if (!*place)
-            ++optind;
-        if (PRINT_ERROR)
-            warnx(illoptchar, optchar);
-        optopt = optchar;
-        return (BADCH);
-    }
-    if (long_options != NULL && optchar == 'W' && oli[1] == ';')
-    {
-        /* -W long-option */
-        if (*place) /* no space */
-            /* NOTHING */;
-        else if (++optind >= nargc)
-        { /* no arg */
-            place = EMSG;
-            if (PRINT_ERROR)
-                warnx(recargchar, optchar);
-            optopt = optchar;
-            return (BADARG);
-        }
-        else /* white space */
-            place = nargv[optind];
-        optchar = parse_long_options(nargv, options, long_options, idx, 0);
-        place = EMSG;
-        return (optchar);
-    }
-    if (*++oli != ':')
-    { /* doesn't take argument */
-        if (!*place)
-            ++optind;
-    }
-    else
-    { /* takes (optional) argument */
-        optarg = NULL;
-        if (*place) /* no white space */
-            optarg = place;
-        else if (oli[1] != ':')
-        { /* arg not optional */
-            if (++optind >= nargc)
-            { /* no arg */
-                place = EMSG;
-                if (PRINT_ERROR)
-                    warnx(recargchar, optchar);
-                optopt = optchar;
-                return (BADARG);
-            }
-            else
-                optarg = nargv[optind];
-        }
-        place = EMSG;
-        ++optind;
-    }
-    /* dump back option letter */
-    return (optchar);
-}
-
-#ifdef REPLACE_GETOPT
-/*
- * getopt --
- *	Parse argc/argv argument vector.
- *
- * [eventually this will replace the BSD getopt]
- */
-int getopt(int nargc, char* const* nargv, const char* options)
-{
-
-    /*
-     * We don't pass FLAG_PERMUTE to getopt_internal() since
-     * the BSD getopt(3) (unlike GNU) has never done this.
-     *
-     * Furthermore, since many privileged programs call getopt()
-     * before dropping privileges it makes sense to keep things
-     * as simple (and bug-free) as possible.
-     */
-    return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
-}
-#endif /* REPLACE_GETOPT */
-
-/*
- * getopt_long --
- *	Parse argc/argv argument vector.
- */
-int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
-{
-
-    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
-}
-
-/*
- * getopt_long_only --
- *	Parse argc/argv argument vector.
- */
-int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
-{
-
-    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
-}
diff --git a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h
deleted file mode 100644
index baa1d61b5..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GETOPT_H__
-/**
- * DISCLAIMER
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is a part of the w64 mingw-runtime package.
- *
- * The w64 mingw-runtime package and its code is distributed in the hope that it
- * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
- * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
- * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#define __GETOPT_H__
-
-/* All the headers include this file. */
-#include <crtdefs.h>
-
-#if defined(WINGETOPT_SHARED_LIB)
-#if defined(BUILDING_WINGETOPT_DLL)
-#define WINGETOPT_API __declspec(dllexport)
-#else
-#define WINGETOPT_API __declspec(dllimport)
-#endif
-#else
-#define WINGETOPT_API
-#endif
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    WINGETOPT_API extern int optind; /* index of first non-option in argv      */
-    WINGETOPT_API extern int optopt; /* single option character, as parsed     */
-    WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
-    /* (user may set to zero, to suppress)    */
-
-    WINGETOPT_API extern char* optarg; /* pointer to argument of current option  */
-
-    extern int getopt(int nargc, char* const* nargv, const char* options);
-
-#ifdef _BSD_SOURCE
-/*
- * BSD adds the non-standard `optreset' feature, for reinitialisation
- * of `getopt' parsing.  We support this feature, for applications which
- * proclaim their BSD heritage, before including this header; however,
- * to maintain portability, developers are advised to avoid it.
- */
-#define optreset __mingw_optreset
-    extern int optreset;
-#endif
-#ifdef __cplusplus
-}
-#endif
-/*
- * POSIX requires the `getopt' API to be specified in `unistd.h';
- * thus, `unistd.h' includes this header.  However, we do not want
- * to expose the `getopt_long' or `getopt_long_only' APIs, when
- * included in this manner.  Thus, close the standard __GETOPT_H__
- * declarations block, and open an additional __GETOPT_LONG_H__
- * specific block, only when *not* __UNISTD_H_SOURCED__, in which
- * to declare the extended API.
- */
-#endif /* !defined(__GETOPT_H__) */
-
-#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
-#define __GETOPT_LONG_H__
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-    struct option /* specification for a long form option...	*/
-    {
-        const char* name; /* option name, without leading hyphens */
-        int has_arg;      /* does it take an argument?		*/
-        int* flag;        /* where to save its status, or NULL	*/
-        int val;          /* its associated status value		*/
-    };
-
-    enum /* permitted values for its `has_arg' field...	*/
-    {
-        no_argument = 0,   /* option never takes an argument	*/
-        required_argument, /* option always requires an argument	*/
-        optional_argument  /* option may take an argument		*/
-    };
-
-    extern int getopt_long(
-        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
-    extern int getopt_long_only(
-        int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
-/*
- * Previous MinGW implementation had...
- */
-#ifndef HAVE_DECL_GETOPT
-/*
- * ...for the long form API only; keep this for compatibility.
- */
-#define HAVE_DECL_GETOPT 1
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
diff --git a/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc b/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc
deleted file mode 100644
index dd3f837d9..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc
+++ /dev/null
@@ -1,528 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#include "fastdeploy/utils/utils.h"
-#ifdef ENABLE_PADDLE_FRONTEND
-#include "paddle2onnx/converter.h"
-#endif
-
-namespace fastdeploy {
-size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
-  if (dtype == nvinfer1::DataType::kFLOAT) {
-    return sizeof(float);
-  } else if (dtype == nvinfer1::DataType::kHALF) {
-    return sizeof(float) / 2;
-  } else if (dtype == nvinfer1::DataType::kINT8) {
-    return sizeof(int8_t);
-  } else if (dtype == nvinfer1::DataType::kINT32) {
-    return sizeof(int32_t);
-  }
-  // kBOOL
-  return sizeof(bool);
-}
-
-FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
-  if (dtype == nvinfer1::DataType::kFLOAT) {
-    return FDDataType::FP32;
-  } else if (dtype == nvinfer1::DataType::kHALF) {
-    return FDDataType::FP16;
-  } else if (dtype == nvinfer1::DataType::kINT8) {
-    return FDDataType::INT8;
-  } else if (dtype == nvinfer1::DataType::kINT32) {
-    return FDDataType::INT32;
-  }
-  // kBOOL
-  return FDDataType::BOOL;
-}
-
-std::vector<int> toVec(const nvinfer1::Dims& dim) {
-  std::vector<int> out(dim.d, dim.d + dim.nbDims);
-  return out;
-}
-
-bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader,
-                             const TrtBackendOption& option) {
-  // paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()];
-  // std::string input_shapes[reader.NumInputs()];
-  std::vector<paddle2onnx::ModelTensorInfo> inputs(reader.NumInputs());
-  std::vector<std::string> input_shapes(reader.NumInputs());
-  for (int i = 0; i < reader.NumInputs(); ++i) {
-    reader.GetInputInfo(i, &inputs[i]);
-
-    // change 0 to -1, when input_dim is a string, onnx will make it to zero
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (inputs[i].shape[j] <= 0) {
-        inputs[i].shape[j] = -1;
-      }
-    }
-
-    input_shapes[i] = "";
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (j != inputs[i].rank - 1) {
-        input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", ");
-      } else {
-        input_shapes[i] += std::to_string(inputs[i].shape[j]);
-      }
-    }
-  }
-
-  bool all_check_passed = true;
-  for (int i = 0; i < reader.NumInputs(); ++i) {
-    bool contain_unknown_dim = false;
-    for (int j = 0; j < inputs[i].rank; ++j) {
-      if (inputs[i].shape[j] < 0) {
-        contain_unknown_dim = true;
-      }
-    }
-
-    std::string name(inputs[i].name, strlen(inputs[i].name));
-    FDINFO << "The loaded model's input tensor:" << name
-           << " has shape [" + input_shapes[i] << "]." << std::endl;
-    if (contain_unknown_dim) {
-      auto iter1 = option.min_shape.find(name);
-      auto iter2 = option.max_shape.find(name);
-      auto iter3 = option.opt_shape.find(name);
-      if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() ||
-          iter3 == option.opt_shape.end()) {
-        FDERROR << "The loaded model's input tensor:" << name
-                << " has dynamic shape [" + input_shapes[i] +
-                       "], but didn't configure it's shape for tensorrt with "
-                       "SetTrtInputShape correctly."
-                << std::endl;
-        all_check_passed = false;
-      }
-    }
-  }
-
-  return all_check_passed;
-}
-
-bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
-                             const TrtBackendOption& option) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  cudaSetDevice(option.gpu_id);
-
-  std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
-  if (!fin) {
-    FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
-            << std::endl;
-    return false;
-  }
-  fin.seekg(0, std::ios::end);
-  std::string engine_buffer;
-  engine_buffer.resize(fin.tellg());
-  fin.seekg(0, std::ios::beg);
-  fin.read(&(engine_buffer.at(0)), engine_buffer.size());
-  fin.close();
-  SampleUniquePtr<IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  if (!runtime) {
-    FDERROR << "Failed to call createInferRuntime()." << std::endl;
-    return false;
-  }
-  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
-      runtime->deserializeCudaEngine(engine_buffer.data(),
-                                     engine_buffer.size()),
-      samplesCommon::InferDeleter());
-  if (!engine_) {
-    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
-    return false;
-  }
-
-  context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
-      engine_->createExecutionContext());
-  FDASSERT(cudaStreamCreate(&stream_) == 0,
-           "[ERROR] Error occurs while calling cudaStreamCreate().");
-  GetInputOutputInfo();
-  initialized_ = true;
-  return true;
-}
-
-bool TrtBackend::InitFromPaddle(const std::string& model_file,
-                                const std::string& params_file,
-                                const TrtBackendOption& option, bool verbose) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-
-#ifdef ENABLE_PADDLE_FRONTEND
-  std::vector<paddle2onnx::CustomOp> custom_ops;
-  for (auto& item : option.custom_op_info_) {
-    paddle2onnx::CustomOp op;
-    std::strcpy(op.op_name, item.first.c_str());
-    std::strcpy(op.export_op_name, item.second.c_str());
-    custom_ops.emplace_back(op);
-  }
-  char* model_content_ptr;
-  int model_content_size = 0;
-  if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
-                           &model_content_ptr, &model_content_size, 11, true,
-                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
-    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
-            << std::endl;
-    return false;
-  }
-
-  if (option.remove_multiclass_nms_) {
-    char* new_model = nullptr;
-    int new_model_size = 0;
-    if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
-                                          &new_model, &new_model_size)) {
-      FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
-      return false;
-    }
-    delete[] model_content_ptr;
-    std::string onnx_model_proto(new_model, new_model + new_model_size);
-    delete[] new_model;
-    return InitFromOnnx(onnx_model_proto, option, true);
-  }
-
-  std::string onnx_model_proto(model_content_ptr,
-                               model_content_ptr + model_content_size);
-  delete[] model_content_ptr;
-  model_content_ptr = nullptr;
-  return InitFromOnnx(onnx_model_proto, option, true);
-#else
-  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
-             "call `InitFromOnnx` instead."
-          << std::endl;
-  return false;
-#endif
-}
-
-bool TrtBackend::InitFromOnnx(const std::string& model_file,
-                              const TrtBackendOption& option,
-                              bool from_memory_buffer) {
-  if (initialized_) {
-    FDERROR << "TrtBackend is already initlized, cannot initialize again."
-            << std::endl;
-    return false;
-  }
-  cudaSetDevice(option.gpu_id);
-
-  std::string onnx_content = "";
-  if (!from_memory_buffer) {
-    std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
-    if (!fin) {
-      FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
-              << std::endl;
-      return false;
-    }
-    fin.seekg(0, std::ios::end);
-    onnx_content.resize(fin.tellg());
-    fin.seekg(0, std::ios::beg);
-    fin.read(&(onnx_content.at(0)), onnx_content.size());
-    fin.close();
-  } else {
-    onnx_content = model_file;
-  }
-
-  // This part of code will record the original outputs order
-  // because the converted tensorrt network may exist wrong order of outputs
-  outputs_order_.clear();
-  auto onnx_reader =
-      paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
-  for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
-    std::string name(
-        onnx_reader.output_names[i],
-        onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
-    outputs_order_[name] = i;
-  }
-  if (!CheckDynamicShapeConfig(onnx_reader, option)) {
-    FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl;
-    return false;
-  }
-
-  if (option.serialize_file != "") {
-    std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
-    if (fin) {
-      FDINFO << "Detect serialized TensorRT Engine file in "
-             << option.serialize_file << ", will load it directly."
-             << std::endl;
-      fin.close();
-      return InitFromTrt(option.serialize_file);
-    }
-  }
-
-  if (!CreateTrtEngine(onnx_content, option)) {
-    return false;
-  }
-
-  context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
-      engine_->createExecutionContext());
-  FDASSERT(cudaStreamCreate(&stream_) == 0,
-           "[ERROR] Error occurs while calling cudaStreamCreate().");
-  GetInputOutputInfo();
-  initialized_ = true;
-  return true;
-}
-
-bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
-                       std::vector<FDTensor>* outputs) {
-  AllocateBufferInDynamicShape(inputs, outputs);
-  std::vector<void*> input_binds(inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    if (inputs[i].dtype == FDDataType::INT64) {
-      int64_t* data = static_cast<int64_t*>(inputs[i].Data());
-      std::vector<int32_t> casted_data(data, data + inputs[i].Numel());
-      FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
-                               static_cast<void*>(casted_data.data()),
-                               inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice,
-                               stream_) == 0,
-               "[ERROR] Error occurs while copy memory from CPU to GPU.");
-    } else {
-      FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
-                               inputs[i].Data(), inputs[i].Nbytes(),
-                               cudaMemcpyHostToDevice, stream_) == 0,
-               "[ERROR] Error occurs while copy memory from CPU to GPU.");
-    }
-  }
-  if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
-    FDERROR << "Failed to Infer with TensorRT." << std::endl;
-    return false;
-  }
-  for (size_t i = 0; i < outputs->size(); ++i) {
-    FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
-                             outputs_buffer_[(*outputs)[i].name].data(),
-                             (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
-                             stream_) == 0,
-             "[ERROR] Error occurs while copy memory from GPU to CPU.");
-  }
-  return true;
-}
-
-void TrtBackend::GetInputOutputInfo() {
-  inputs_desc_.clear();
-  outputs_desc_.clear();
-  auto num_binds = engine_->getNbBindings();
-  for (auto i = 0; i < num_binds; ++i) {
-    std::string name = std::string(engine_->getBindingName(i));
-    auto shape = toVec(engine_->getBindingDimensions(i));
-    auto dtype = engine_->getBindingDataType(i);
-    if (engine_->bindingIsInput(i)) {
-      inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      inputs_buffer_[name] = DeviceBuffer(dtype);
-    } else {
-      outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
-      outputs_buffer_[name] = DeviceBuffer(dtype);
-    }
-  }
-  bindings_.resize(num_binds);
-}
-
-void TrtBackend::AllocateBufferInDynamicShape(
-    const std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs) {
-  for (const auto& item : inputs) {
-    auto idx = engine_->getBindingIndex(item.name.c_str());
-    std::vector<int> shape(item.shape.begin(), item.shape.end());
-    auto dims = sample::toDims(shape);
-    context_->setBindingDimensions(idx, dims);
-    if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
-      inputs_buffer_[item.name].resize(dims);
-      bindings_[idx] = inputs_buffer_[item.name].data();
-    }
-  }
-  if (outputs->size() != outputs_desc_.size()) {
-    outputs->resize(outputs_desc_.size());
-  }
-  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
-    auto output_dims = context_->getBindingDimensions(idx);
-
-    // find the original index of output
-    auto iter = outputs_order_.find(outputs_desc_[i].name);
-    FDASSERT(iter != outputs_order_.end(),
-             "Cannot find output:" + outputs_desc_[i].name +
-                 " of tensorrt network from the original model.");
-    auto ori_idx = iter->second;
-    (*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
-    (*outputs)[ori_idx].shape.assign(output_dims.d,
-                                     output_dims.d + output_dims.nbDims);
-    (*outputs)[ori_idx].name = outputs_desc_[i].name;
-    (*outputs)[ori_idx].data.resize(volume(output_dims) *
-                                    TrtDataTypeSize(outputs_desc_[i].dtype));
-    if ((*outputs)[ori_idx].Nbytes() >
-        outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
-      outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
-      bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
-    }
-  }
-}
-
-bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
-                                 const TrtBackendOption& option) {
-  const auto explicitBatch =
-      1U << static_cast<uint32_t>(
-          nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-
-  builder_ = SampleUniquePtr<nvinfer1::IBuilder>(
-      nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
-  if (!builder_) {
-    FDERROR << "Failed to call createInferBuilder()." << std::endl;
-    return false;
-  }
-  network_ = SampleUniquePtr<nvinfer1::INetworkDefinition>(
-      builder_->createNetworkV2(explicitBatch));
-  if (!network_) {
-    FDERROR << "Failed to call createNetworkV2()." << std::endl;
-    return false;
-  }
-  auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(
-      builder_->createBuilderConfig());
-  if (!config) {
-    FDERROR << "Failed to call createBuilderConfig()." << std::endl;
-    return false;
-  }
-
-  if (option.enable_fp16) {
-    if (!builder_->platformHasFastFp16()) {
-      FDWARNING << "Detected FP16 is not supported in the current GPU, "
-                   "will use FP32 instead."
-                << std::endl;
-    } else {
-      config->setFlag(nvinfer1::BuilderFlag::kFP16);
-    }
-  }
-
-  parser_ = SampleUniquePtr<nvonnxparser::IParser>(
-      nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger()));
-  if (!parser_) {
-    FDERROR << "Failed to call createParser()." << std::endl;
-    return false;
-  }
-  if (!parser_->parse(onnx_model.data(), onnx_model.size())) {
-    FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
-    return false;
-  }
-
-  FDINFO << "Start to building TensorRT Engine..." << std::endl;
-  bool fp16 = builder_->platformHasFastFp16();
-  builder_->setMaxBatchSize(option.max_batch_size);
-
-  config->setMaxWorkspaceSize(option.max_workspace_size);
-
-  if (option.max_shape.size() > 0) {
-    auto profile = builder_->createOptimizationProfile();
-    FDASSERT(option.max_shape.size() == option.min_shape.size() &&
-                 option.min_shape.size() == option.opt_shape.size(),
-             "[TrtBackend] Size of max_shape/opt_shape/min_shape in "
-             "TrtBackendOption should keep same.");
-    for (const auto& item : option.min_shape) {
-      // set min shape
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMIN,
-                                      sample::toDims(item.second)),
-               "[TrtBackend] Failed to set min_shape for input: " + item.first +
-                   " in TrtBackend.");
-
-      // set optimization shape
-      auto iter = option.opt_shape.find(item.first);
-      FDASSERT(iter != option.opt_shape.end(),
-               "[TrtBackend] Cannot find input name: " + item.first +
-                   " in TrtBackendOption::opt_shape.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kOPT,
-                                      sample::toDims(iter->second)),
-               "[TrtBackend] Failed to set opt_shape for input: " + item.first +
-                   " in TrtBackend.");
-      // set max shape
-      iter = option.max_shape.find(item.first);
-      FDASSERT(iter != option.max_shape.end(),
-               "[TrtBackend] Cannot find input name: " + item.first +
-                   " in TrtBackendOption::max_shape.");
-      FDASSERT(profile->setDimensions(item.first.c_str(),
-                                      nvinfer1::OptProfileSelector::kMAX,
-                                      sample::toDims(iter->second)),
-               "[TrtBackend] Failed to set max_shape for input: " + item.first +
-                   " in TrtBackend.");
-    }
-    config->addOptimizationProfile(profile);
-  }
-
-  SampleUniquePtr<IHostMemory> plan{
-      builder_->buildSerializedNetwork(*network_, *config)};
-  if (!plan) {
-    FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
-    return false;
-  }
-
-  SampleUniquePtr<IRuntime> runtime{
-      createInferRuntime(sample::gLogger.getTRTLogger())};
-  if (!runtime) {
-    FDERROR << "Failed to call createInferRuntime()." << std::endl;
-    return false;
-  }
-
-  engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
-      runtime->deserializeCudaEngine(plan->data(), plan->size()),
-      samplesCommon::InferDeleter());
-  if (!engine_) {
-    FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
-    return false;
-  }
-
-  FDINFO << "TensorRT Engine is built succussfully." << std::endl;
-  if (option.serialize_file != "") {
-    FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file
-           << "." << std::endl;
-    std::ofstream engine_file(option.serialize_file.c_str());
-    if (!engine_file) {
-      FDERROR << "Failed to open " << option.serialize_file << " to write."
-              << std::endl;
-      return false;
-    }
-    engine_file.write(static_cast<char*>(plan->data()), plan->size());
-    engine_file.close();
-    FDINFO << "TensorRTEngine is serialized to local file "
-           << option.serialize_file
-           << ", we can load this model from the seralized engine "
-              "directly next time."
-           << std::endl;
-  }
-  return true;
-}
-
-TensorInfo TrtBackend::GetInputInfo(int index) {
-  FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
-                                    " should less than the number of inputs:" +
-                                    std::to_string(NumInputs()) + ".");
-  TensorInfo info;
-  info.name = inputs_desc_[index].name;
-  info.shape.assign(inputs_desc_[index].shape.begin(),
-                    inputs_desc_[index].shape.end());
-  info.dtype = GetFDDataType(inputs_desc_[index].dtype);
-  return info;
-}
-
-TensorInfo TrtBackend::GetOutputInfo(int index) {
-  FDASSERT(index < NumOutputs(),
-           "The index:" + std::to_string(index) +
-               " should less than the number of outputs:" +
-               std::to_string(NumOutputs()) + ".");
-  TensorInfo info;
-  info.name = outputs_desc_[index].name;
-  info.shape.assign(outputs_desc_[index].shape.begin(),
-                    outputs_desc_[index].shape.end());
-  info.dtype = GetFDDataType(outputs_desc_[index].dtype);
-  return info;
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/backends/tensorrt/trt_backend.h b/csrcs/fastdeploy/backends/tensorrt/trt_backend.h
deleted file mode 100644
index 376da241f..000000000
--- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <iostream>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-
-#include "fastdeploy/backends/tensorrt/common/argsParser.h"
-#include "fastdeploy/backends/tensorrt/common/buffers.h"
-#include "fastdeploy/backends/tensorrt/common/common.h"
-#include "fastdeploy/backends/tensorrt/common/logger.h"
-#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
-#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
-
-#include <cuda_runtime_api.h>
-#include "NvInfer.h"
-
-namespace fastdeploy {
-using namespace samplesCommon;
-
-struct TrtValueInfo {
-  std::string name;
-  std::vector<int> shape;
-  nvinfer1::DataType dtype;
-};
-
-struct TrtBackendOption {
-  int gpu_id = 0;
-  bool enable_fp16 = false;
-  bool enable_int8 = false;
-  size_t max_batch_size = 32;
-  size_t max_workspace_size = 1 << 30;
-  std::map<std::string, std::vector<int32_t>> max_shape;
-  std::map<std::string, std::vector<int32_t>> min_shape;
-  std::map<std::string, std::vector<int32_t>> opt_shape;
-  std::string serialize_file = "";
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-std::vector<int> toVec(const nvinfer1::Dims& dim);
-size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
-FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
-
-class TrtBackend : public BaseBackend {
- public:
-  TrtBackend() : engine_(nullptr), context_(nullptr) {}
-  virtual ~TrtBackend() = default;
-  void BuildOption(const TrtBackendOption& option);
-
-  bool InitFromPaddle(const std::string& model_file,
-                      const std::string& params_file,
-                      const TrtBackendOption& option = TrtBackendOption(),
-                      bool verbose = false);
-  bool InitFromOnnx(const std::string& model_file,
-                    const TrtBackendOption& option = TrtBackendOption(),
-                    bool from_memory_buffer = false);
-  bool InitFromTrt(const std::string& trt_engine_file,
-                   const TrtBackendOption& option = TrtBackendOption());
-
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs() const { return inputs_desc_.size(); }
-  int NumOutputs() const { return outputs_desc_.size(); }
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
- private:
-  std::shared_ptr<nvinfer1::ICudaEngine> engine_;
-  std::shared_ptr<nvinfer1::IExecutionContext> context_;
-  SampleUniquePtr<nvonnxparser::IParser> parser_;
-  SampleUniquePtr<nvinfer1::IBuilder> builder_;
-  SampleUniquePtr<nvinfer1::INetworkDefinition> network_;
-  cudaStream_t stream_{};
-  std::vector<void*> bindings_;
-  std::vector<TrtValueInfo> inputs_desc_;
-  std::vector<TrtValueInfo> outputs_desc_;
-  std::map<std::string, DeviceBuffer> inputs_buffer_;
-  std::map<std::string, DeviceBuffer> outputs_buffer_;
-
-  // Sometimes while the number of outputs > 1
-  // the output order of tensorrt may not be same
-  // with the original onnx model
-  // So this parameter will record to origin outputs
-  // order, to help recover the rigt order
-  std::map<std::string, int> outputs_order_;
-
-  void GetInputOutputInfo();
-  void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
-                                    std::vector<FDTensor>* outputs);
-  bool CreateTrtEngine(const std::string& onnx_model,
-                       const TrtBackendOption& option);
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/config.h.in b/csrcs/fastdeploy/core/config.h.in
deleted file mode 100644
index 771392586..000000000
--- a/csrcs/fastdeploy/core/config.h.in
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#ifndef FASTDEPLOY_DEBUG
-#cmakedefine FASTDEPLOY_DEBUG
-#endif
-
-#ifndef FASTDEPLOY_LIB
-#cmakedefine FASTDEPLOY_LIB
-#endif
-
-#ifndef ENABLE_PADDLE_FRONTEND
-#cmakedefine ENABLE_PADDLE_FRONTEND
-#endif
-
-#ifndef ENABLE_ORT_BACKEND
-#cmakedefine ENABLE_ORT_BACKEND
-#endif
-
-#ifndef ENABLE_PADDLE_BACKEND
-#cmakedefine ENABLE_PADDLE_BACKEND
-#endif
-
-#ifndef WITH_GPU
-#cmakedefine WITH_GPU
-#endif
-
-#ifndef ENABLE_TRT_BACKEND
-#cmakedefine ENABLE_TRT_BACKEND
-#endif
-
-#ifndef ENABLE_VISION
-#cmakedefine ENABLE_VISION
-#endif
-
-#ifndef ENABLE_OPENCV_CUDA
-#cmakedefine ENABLE_OPENCV_CUDA
-#endif
-
-#ifndef ENABLE_VISION_VISUALIZE
-#cmakedefine ENABLE_VISION_VISUALIZE
-#endif
diff --git a/csrcs/fastdeploy/core/fd_tensor.cc b/csrcs/fastdeploy/core/fd_tensor.cc
deleted file mode 100644
index c6f7a4739..000000000
--- a/csrcs/fastdeploy/core/fd_tensor.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-#ifdef WITH_GPU
-#include <cuda_runtime_api.h>
-#endif
-
-namespace fastdeploy {
-
-void* FDTensor::MutableData() {
-  if (external_data_ptr != nullptr) {
-    return external_data_ptr;
-  }
-  return data.data();
-}
-
-void* FDTensor::Data() {
-  if (external_data_ptr != nullptr) {
-    if (device == Device::GPU) {
-#ifdef WITH_GPU
-      // need to copy cuda mem to cpu first
-      temporary_cpu_buffer.resize(Nbytes());
-      FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
-                          Nbytes(), cudaMemcpyDeviceToHost) == 0,
-               "[ERROR] Error occurs while copy memory from GPU to CPU");
-      return temporary_cpu_buffer.data();
-#else
-      FDASSERT(false,
-               "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
-               "an unexpected problem happend.");
-#endif
-    } else {
-      return external_data_ptr;
-    }
-  }
-  return data.data();
-}
-
-const void* FDTensor::Data() const {
-  if (external_data_ptr != nullptr) {
-    return external_data_ptr;
-  }
-  return data.data();
-}
-
-void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
-                               const FDDataType& data_type, void* data_buffer) {
-  dtype = data_type;
-  shape.assign(new_shape.begin(), new_shape.end());
-  external_data_ptr = data_buffer;
-}
-
-void FDTensor::Allocate(const std::vector<int64_t>& new_shape,
-                        const FDDataType& data_type,
-                        const std::string& tensor_name) {
-  dtype = data_type;
-  name = tensor_name;
-  shape.assign(new_shape.begin(), new_shape.end());
-  int unit = FDDataTypeSize(data_type);
-  int total_size =
-      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-  data.resize(total_size * unit);
-}
-
-int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
-
-int FDTensor::Numel() const {
-  return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-}
-
-template <typename T>
-void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
-                         double* min) {
-  T* ptr = static_cast<T*>(src_ptr);
-  *mean = 0;
-  *max = -99999999;
-  *min = 99999999;
-  for (int i = 0; i < size; ++i) {
-    if (*(ptr + i) > *max) {
-      *max = *(ptr + i);
-    }
-    if (*(ptr + i) < *min) {
-      *min = *(ptr + i);
-    }
-    *mean += *(ptr + i);
-  }
-  *mean = *mean / size;
-}
-
-void FDTensor::PrintInfo(const std::string& prefix) {
-  double mean = 0;
-  double max = -99999999;
-  double min = 99999999;
-  if (dtype == FDDataType::FP32) {
-    CalculateStatisInfo<float>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::FP64) {
-    CalculateStatisInfo<double>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT8) {
-    CalculateStatisInfo<int8_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::UINT8) {
-    CalculateStatisInfo<uint8_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT32) {
-    CalculateStatisInfo<int32_t>(Data(), Numel(), &mean, &max, &min);
-  } else if (dtype == FDDataType::INT64) {
-    CalculateStatisInfo<int64_t>(Data(), Numel(), &mean, &max, &min);
-  } else {
-    FDASSERT(false,
-             "PrintInfo function doesn't support current situation, maybe you "
-             "need enhance this function now.")
-  }
-  std::cout << prefix << ": shape=";
-  for (int i = 0; i < shape.size(); ++i) {
-    std::cout << shape[i] << " ";
-  }
-  std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max
-            << ", min=" << min << std::endl;
-}
-
-FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/fd_tensor.h b/csrcs/fastdeploy/core/fd_tensor.h
deleted file mode 100644
index 84e8c7ff0..000000000
--- a/csrcs/fastdeploy/core/fd_tensor.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <vector>
-
-#include "fastdeploy/core/fd_type.h"
-
-namespace fastdeploy {
-
-struct FASTDEPLOY_DECL FDTensor {
-  std::vector<int8_t> data;
-  std::vector<int64_t> shape;
-  std::string name = "";
-  FDDataType dtype;
-
-  // This use to skip memory copy step
-  // the external_data_ptr will point to the user allocated memory
-  // user has to maintain the memory, allocate and release
-  void* external_data_ptr = nullptr;
-  // The internal data will be on CPU
-  // Some times, the external data is on the GPU, and we are going to use
-  // GPU to inference the model
-  // so we can skip data transfer, which may improve the efficience
-  Device device = Device::CPU;
-
-  // if the external data is not on CPU, we use this temporary buffer
-  // to transfer data to CPU at some cases we need to visit the
-  // other devices' data
-  std::vector<int8_t> temporary_cpu_buffer;
-
-  // Get data buffer pointer
-  void* MutableData();
-
-  // Use this data to get the tensor data to process
-  // Since the most senario is process data in CPU
-  // this function weill return a pointer to cpu memory
-  // buffer.
-  // If the original data is on other device, the data
-  // will copy to cpu store in `temporary_cpu_buffer`
-  void* Data();
-
-  const void* Data() const;
-
-  // Set user memory buffer for Tensor, the memory is managed by
-  // the user it self, but the Tensor will share the memory with user
-  // So take care with the user buffer
-  void SetExternalData(const std::vector<int64_t>& new_shape,
-                       const FDDataType& data_type, void* data_buffer);
-
-  // Initialize Tensor
-  // Include setting attribute for tensor
-  // and allocate cpu memory buffer
-  void Allocate(const std::vector<int64_t>& new_shape,
-                const FDDataType& data_type,
-                const std::string& tensor_name = "");
-
-  // Total size of tensor memory buffer in bytes
-  int Nbytes() const;
-
-  // Total number of elements in this tensor
-  int Numel() const;
-
-  // Debug function
-  // Use this function to print shape, dtype, mean, max, min
-  // prefix will also be printed as tag
-  void PrintInfo(const std::string& prefix = "TensorInfo: ");
-
-  FDTensor() {}
-  explicit FDTensor(const std::string& tensor_name);
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/fd_type.cc b/csrcs/fastdeploy/core/fd_type.cc
deleted file mode 100644
index ae70fa6e5..000000000
--- a/csrcs/fastdeploy/core/fd_type.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/core/fd_type.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-int FDDataTypeSize(const FDDataType& data_type) {
-  FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
-  if (data_type == FDDataType::BOOL) {
-    return sizeof(bool);
-  } else if (data_type == FDDataType::INT16) {
-    return sizeof(int16_t);
-  } else if (data_type == FDDataType::INT32) {
-    return sizeof(int32_t);
-  } else if (data_type == FDDataType::INT64) {
-    return sizeof(int64_t);
-  } else if (data_type == FDDataType::FP32) {
-    return sizeof(float);
-  } else if (data_type == FDDataType::FP64) {
-    return sizeof(double);
-  } else if (data_type == FDDataType::UINT8) {
-    return sizeof(uint8_t);
-  } else {
-    FDASSERT(false, "Unexpected data type: " + Str(data_type));
-  }
-  return -1;
-}
-
-std::string Str(const Device& d) {
-  std::string out;
-  switch (d) {
-    case Device::DEFAULT:
-      out = "Device::DEFAULT";
-      break;
-    case Device::CPU:
-      out = "Device::CPU";
-      break;
-    case Device::GPU:
-      out = "Device::GPU";
-      break;
-    default:
-      out = "Device::UNKOWN";
-  }
-  return out;
-}
-
-std::string Str(const FDDataType& fdt) {
-  std::string out;
-  switch (fdt) {
-    case FDDataType::BOOL:
-      out = "FDDataType::BOOL";
-      break;
-    case FDDataType::INT16:
-      out = "FDDataType::INT16";
-      break;
-    case FDDataType::INT32:
-      out = "FDDataType::INT32";
-      break;
-    case FDDataType::INT64:
-      out = "FDDataType::INT64";
-      break;
-    case FDDataType::FP32:
-      out = "FDDataType::FP32";
-      break;
-    case FDDataType::FP64:
-      out = "FDDataType::FP64";
-      break;
-    case FDDataType::FP16:
-      out = "FDDataType::FP16";
-      break;
-    case FDDataType::UINT8:
-      out = "FDDataType::UINT8";
-      break;
-    case FDDataType::INT8:
-      out = "FDDataType::INT8";
-      break;
-    default:
-      out = "FDDataType::UNKNOWN";
-  }
-  return out;
-}
-
-template <typename PlainType>
-const FDDataType TypeToDataType<PlainType>::dtype = UNKNOWN1;
-
-template <>
-const FDDataType TypeToDataType<bool>::dtype = BOOL;
-
-template <>
-const FDDataType TypeToDataType<int16_t>::dtype = INT16;
-
-template <>
-const FDDataType TypeToDataType<int32_t>::dtype = INT32;
-
-template <>
-const FDDataType TypeToDataType<int64_t>::dtype = INT64;
-
-template <>
-const FDDataType TypeToDataType<float>::dtype = FP32;
-
-template <>
-const FDDataType TypeToDataType<double>::dtype = FP64;
-
-template <>
-const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
-
-template <>
-const FDDataType TypeToDataType<int8_t>::dtype = INT8;
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/core/fd_type.h b/csrcs/fastdeploy/core/fd_type.h
deleted file mode 100644
index 50b00dca8..000000000
--- a/csrcs/fastdeploy/core/fd_type.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <ostream>
-#include <sstream>
-#include <string>
-
-#include "fastdeploy/core/config.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-enum FASTDEPLOY_DECL Device { DEFAULT, CPU, GPU };
-
-FASTDEPLOY_DECL std::string Str(const Device& d);
-
-enum FASTDEPLOY_DECL FDDataType {
-  BOOL,
-  INT16,
-  INT32,
-  INT64,
-  FP16,
-  FP32,
-  FP64,
-  UNKNOWN1,
-  UNKNOWN2,
-  UNKNOWN3,
-  UNKNOWN4,
-  UNKNOWN5,
-  UNKNOWN6,
-  UNKNOWN7,
-  UNKNOWN8,
-  UNKNOWN9,
-  UNKNOWN10,
-  UNKNOWN11,
-  UNKNOWN12,
-  UNKNOWN13,
-  UINT8,
-  INT8
-};
-
-FASTDEPLOY_DECL std::string Str(const FDDataType& fdt);
-
-FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype);
-
-template <typename PlainType>
-struct FASTDEPLOY_DECL TypeToDataType {
-  static const FDDataType dtype;
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/fastdeploy_model.cc b/csrcs/fastdeploy/fastdeploy_model.cc
deleted file mode 100644
index c4dbc70a7..000000000
--- a/csrcs/fastdeploy/fastdeploy_model.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-bool FastDeployModel::InitRuntime() {
-  FDASSERT(
-      CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
-      "ModelFormatCheck Failed.");
-  if (runtime_initialized_) {
-    FDERROR << "The model is already initialized, cannot be initliazed again."
-            << std::endl;
-    return false;
-  }
-  if (runtime_option.backend != Backend::UNKNOWN) {
-    if (runtime_option.backend == Backend::ORT) {
-      if (!IsBackendAvailable(Backend::ORT)) {
-        FDERROR
-            << "Backend::ORT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::TRT) {
-      if (!IsBackendAvailable(Backend::TRT)) {
-        FDERROR
-            << "Backend::TRT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::PDINFER) {
-      if (!IsBackendAvailable(Backend::PDINFER)) {
-        FDERROR << "Backend::PDINFER is not compiled with current FastDeploy "
-                   "library."
-                << std::endl;
-        return false;
-      }
-    } else {
-      FDERROR
-          << "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now."
-          << std::endl;
-      return false;
-    }
-    runtime_ = utils::make_unique<Runtime>();
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-
-  if (runtime_option.device == Device::CPU) {
-    return CreateCpuBackend();
-  } else if (runtime_option.device == Device::GPU) {
-#ifdef WITH_GPU
-    return CreateGpuBackend();
-#else
-    FDERROR << "The compiled FastDeploy library doesn't support GPU now."
-            << std::endl;
-    return false;
-#endif
-  }
-  FDERROR << "Only support CPU/GPU now." << std::endl;
-  return false;
-}
-
-bool FastDeployModel::CreateCpuBackend() {
-  if (valid_cpu_backends.size() == 0) {
-    FDERROR << "There's no valid cpu backends for model: " << ModelName()
-            << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
-    if (!IsBackendAvailable(valid_cpu_backends[i])) {
-      continue;
-    }
-    runtime_option.backend = valid_cpu_backends[i];
-    runtime_ = std::unique_ptr<Runtime>(new Runtime());
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
-  return false;
-}
-
-bool FastDeployModel::CreateGpuBackend() {
-  if (valid_gpu_backends.size() == 0) {
-    FDERROR << "There's no valid gpu backends for model: " << ModelName()
-            << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
-    if (!IsBackendAvailable(valid_gpu_backends[i])) {
-      continue;
-    }
-    runtime_option.backend = valid_gpu_backends[i];
-    runtime_ = std::unique_ptr<Runtime>(new Runtime());
-    if (!runtime_->Init(runtime_option)) {
-      return false;
-    }
-    runtime_initialized_ = true;
-    return true;
-  }
-  FDERROR << "Cannot find an available gpu backend to load this model."
-          << std::endl;
-  return false;
-}
-
-bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
-                            std::vector<FDTensor>* output_tensors) {
-  return runtime_->Infer(input_tensors, output_tensors);
-}
-
-void FastDeployModel::EnableDebug() {
-#ifdef FASTDEPLOY_DEBUG
-  debug_ = true;
-#else
-  FDWARNING << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
-               "cannot enable debug mode."
-            << std::endl;
-  debug_ = false;
-#endif
-}
-
-bool FastDeployModel::DebugEnabled() { return debug_; }
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/fastdeploy_model.h b/csrcs/fastdeploy/fastdeploy_model.h
deleted file mode 100644
index df83ac525..000000000
--- a/csrcs/fastdeploy/fastdeploy_model.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/fastdeploy_runtime.h"
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL FastDeployModel {
- public:
-  virtual std::string ModelName() const { return "NameUndefined"; }
-
-  virtual bool InitRuntime();
-  virtual bool CreateCpuBackend();
-  virtual bool CreateGpuBackend();
-  virtual bool Infer(std::vector<FDTensor>& input_tensors,
-                     std::vector<FDTensor>* output_tensors);
-
-  RuntimeOption runtime_option;
-  std::vector<Backend> valid_cpu_backends = {Backend::ORT};
-  std::vector<Backend> valid_gpu_backends = {Backend::ORT};
-  std::vector<Backend> valid_external_backends;
-  bool initialized = false;
-  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
-  virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
-  virtual TensorInfo InputInfoOfRuntime(int index) {
-    return runtime_->GetInputInfo(index);
-  }
-  virtual TensorInfo OutputInfoOfRuntime(int index) {
-    return runtime_->GetOutputInfo(index);
-  }
-  virtual bool Initialized() const {
-    return runtime_initialized_ && initialized;
-  }
-
-  virtual void EnableDebug();
-  virtual bool DebugEnabled();
-
- private:
-  std::unique_ptr<Runtime> runtime_;
-  bool runtime_initialized_ = false;
-  bool debug_ = false;
-};
-
-#define TIMERECORD_START(id) \
-  TimeCounter tc_##id;       \
-  tc_##id.Start();
-
-#define TIMERECORD_END(id, prefix)                                           \
-  if (DebugEnabled()) {                                                      \
-    tc_##id.End();                                                           \
-    FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \
-               << prefix << " duration = " << tc_##id.Duration() << "s."     \
-               << std::endl;                                                 \
-  }
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/fastdeploy_runtime.cc b/csrcs/fastdeploy/fastdeploy_runtime.cc
deleted file mode 100644
index e5c41a29a..000000000
--- a/csrcs/fastdeploy/fastdeploy_runtime.cc
+++ /dev/null
@@ -1,365 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/fastdeploy_runtime.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/utils/utils.h"
-
-#ifdef ENABLE_ORT_BACKEND
-#include "fastdeploy/backends/ort/ort_backend.h"
-#endif
-
-#ifdef ENABLE_TRT_BACKEND
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#endif
-
-#ifdef ENABLE_PADDLE_BACKEND
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-#endif
-
-namespace fastdeploy {
-
-std::vector<Backend> GetAvailableBackends() {
-  std::vector<Backend> backends;
-#ifdef ENABLE_ORT_BACKEND
-  backends.push_back(Backend::ORT);
-#endif
-#ifdef ENABLE_TRT_BACKEND
-  backends.push_back(Backend::TRT);
-#endif
-#ifdef ENABLE_PADDLE_BACKEND
-  backends.push_back(Backend::PDINFER);
-#endif
-  return backends;
-}
-
-bool IsBackendAvailable(const Backend& backend) {
-  std::vector<Backend> backends = GetAvailableBackends();
-  for (size_t i = 0; i < backends.size(); ++i) {
-    if (backend == backends[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-
-std::string Str(const Backend& b) {
-  if (b == Backend::ORT) {
-    return "Backend::ORT";
-  } else if (b == Backend::TRT) {
-    return "Backend::TRT";
-  } else if (b == Backend::PDINFER) {
-    return "Backend::PDINFER";
-  }
-  return "UNKNOWN-Backend";
-}
-
-std::string Str(const Frontend& f) {
-  if (f == Frontend::PADDLE) {
-    return "Frontend::PADDLE";
-  } else if (f == Frontend::ONNX) {
-    return "Frontend::ONNX";
-  }
-  return "UNKNOWN-Frontend";
-}
-
-bool CheckModelFormat(const std::string& model_file,
-                      const Frontend& model_format) {
-  if (model_format == Frontend::PADDLE) {
-    if (model_file.size() < 8 ||
-        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
-      FDERROR << "With model format of Frontend::PADDLE, the model file "
-                 "should ends with `.pdmodel`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == Frontend::ONNX) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
-      FDERROR << "With model format of Frontend::ONNX, the model file "
-                 "should ends with `.onnx`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else {
-    FDERROR << "Only support model format with frontend Frontend::PADDLE / "
-               "Frontend::ONNX."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-Frontend GuessModelFormat(const std::string& model_file) {
-  if (model_file.size() > 8 &&
-      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
-    FDLogger() << "Model Format: PaddlePaddle." << std::endl;
-    return Frontend::PADDLE;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
-    FDLogger() << "Model Format: ONNX." << std::endl;
-    return Frontend::ONNX;
-  }
-
-  FDERROR << "Cannot guess which model format you are using, please set "
-             "RuntimeOption::model_format manually."
-          << std::endl;
-  return Frontend::PADDLE;
-}
-
-void RuntimeOption::SetModelPath(const std::string& model_path,
-                                 const std::string& params_path,
-                                 const std::string& _model_format) {
-  if (_model_format == "paddle") {
-    model_file = model_path;
-    params_file = params_path;
-    model_format = Frontend::PADDLE;
-  } else if (_model_format == "onnx") {
-    model_file = model_path;
-    model_format = Frontend::ONNX;
-  } else {
-    FDASSERT(false, "The model format only can be 'paddle' or 'onnx'.");
-  }
-}
-
-void RuntimeOption::UseGpu(int gpu_id) {
-#ifdef WITH_GPU
-  device = Device::GPU;
-  device_id = gpu_id;
-#else
-  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
-            << std::endl;
-  device = Device::CPU;
-#endif
-}
-
-void RuntimeOption::UseCpu() { device = Device::CPU; }
-
-void RuntimeOption::SetCpuThreadNum(int thread_num) {
-  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
-  cpu_thread_num = thread_num;
-}
-
-// use paddle inference backend
-void RuntimeOption::UsePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  backend = Backend::PDINFER;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
-#endif
-}
-
-// use onnxruntime backend
-void RuntimeOption::UseOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  backend = Backend::ORT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
-#endif
-}
-
-void RuntimeOption::UseTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  backend = Backend::TRT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
-#endif
-}
-
-void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; }
-
-void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; }
-
-void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
-  FDASSERT(size > 0, "Parameter size must greater than 0.");
-  pd_mkldnn_cache_size = size;
-}
-
-void RuntimeOption::SetTrtInputShape(const std::string& input_name,
-                                     const std::vector<int32_t>& min_shape,
-                                     const std::vector<int32_t>& opt_shape,
-                                     const std::vector<int32_t>& max_shape) {
-  trt_min_shape[input_name].clear();
-  trt_max_shape[input_name].clear();
-  trt_opt_shape[input_name].clear();
-  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  if (opt_shape.size() == 0) {
-    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
-  }
-  if (max_shape.size() == 0) {
-    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
-  }
-}
-
-void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
-
-void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
-
-void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
-  trt_serialize_file = cache_file_path;
-}
-
-bool Runtime::Init(const RuntimeOption& _option) {
-  option = _option;
-  if (option.model_format == Frontend::AUTOREC) {
-    option.model_format = GuessModelFormat(_option.model_file);
-  }
-  if (option.backend == Backend::UNKNOWN) {
-    if (IsBackendAvailable(Backend::ORT)) {
-      option.backend = Backend::ORT;
-    } else if (IsBackendAvailable(Backend::PDINFER)) {
-      option.backend = Backend::PDINFER;
-    } else {
-      FDERROR << "Please define backend in RuntimeOption, current it's "
-                 "Backend::UNKNOWN."
-              << std::endl;
-      return false;
-    }
-  }
-  if (option.backend == Backend::ORT) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::TRT only supports Device::CPU/Device::GPU.");
-    CreateOrtBackend();
-  } else if (option.backend == Backend::TRT) {
-    FDASSERT(option.device == Device::GPU,
-             "Backend::TRT only supports Device::GPU.");
-    CreateTrtBackend();
-  } else if (option.backend == Backend::PDINFER) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::TRT only supports Device::CPU/Device::GPU.");
-    FDASSERT(
-        option.model_format == Frontend::PADDLE,
-        "Backend::PDINFER only supports model format of Frontend::PADDLE.");
-    CreatePaddleBackend();
-  } else {
-    FDERROR << "Runtime only support "
-               "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-TensorInfo Runtime::GetInputInfo(int index) {
-  return backend_->GetInputInfo(index);
-}
-
-TensorInfo Runtime::GetOutputInfo(int index) {
-  return backend_->GetOutputInfo(index);
-}
-
-bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
-                    std::vector<FDTensor>* output_tensors) {
-  return backend_->Infer(input_tensors, output_tensors);
-}
-
-void Runtime::CreatePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  auto pd_option = PaddleBackendOption();
-  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
-  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
-  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  pd_option.gpu_id = option.device_id;
-  pd_option.cpu_thread_num = option.cpu_thread_num;
-  FDASSERT(option.model_format == Frontend::PADDLE,
-           "PaddleBackend only support model format of Frontend::PADDLE.");
-  backend_ = utils::make_unique<PaddleBackend>();
-  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
-  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
-                                          pd_option),
-           "Load model from Paddle failed while initliazing PaddleBackend.");
-#else
-  FDASSERT(false,
-           "PaddleBackend is not available, please compiled with "
-           "ENABLE_PADDLE_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  auto ort_option = OrtBackendOption();
-  ort_option.graph_optimization_level = option.ort_graph_opt_level;
-  ort_option.intra_op_num_threads = option.cpu_thread_num;
-  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
-  ort_option.execution_mode = option.ort_execution_mode;
-  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  ort_option.gpu_id = option.device_id;
-
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  ort_option.custom_op_info_ = option.custom_op_info_;
-
-  FDASSERT(option.model_format == Frontend::PADDLE ||
-               option.model_format == Frontend::ONNX,
-           "OrtBackend only support model format of Frontend::PADDLE / "
-           "Frontend::ONNX.");
-  backend_ = utils::make_unique<OrtBackend>();
-  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
-  if (option.model_format == Frontend::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
-             "Load model from ONNX failed while initliazing OrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, ort_option),
-             "Load model from Paddle failed while initliazing OrtBackend.");
-  }
-#else
-  FDASSERT(false,
-           "OrtBackend is not available, please compiled with "
-           "ENABLE_ORT_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  auto trt_option = TrtBackendOption();
-  trt_option.gpu_id = option.device_id;
-  trt_option.enable_fp16 = option.trt_enable_fp16;
-  trt_option.enable_int8 = option.trt_enable_int8;
-  trt_option.max_batch_size = option.trt_max_batch_size;
-  trt_option.max_workspace_size = option.trt_max_workspace_size;
-  trt_option.max_shape = option.trt_max_shape;
-  trt_option.min_shape = option.trt_min_shape;
-  trt_option.opt_shape = option.trt_opt_shape;
-  trt_option.serialize_file = option.trt_serialize_file;
-
-  // TODO(jiangjiajun): inside usage, maybe remove this later
-  trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
-  trt_option.custom_op_info_ = option.custom_op_info_;
-
-  FDASSERT(option.model_format == Frontend::PADDLE ||
-               option.model_format == Frontend::ONNX,
-           "TrtBackend only support model format of Frontend::PADDLE / "
-           "Frontend::ONNX.");
-  backend_ = utils::make_unique<TrtBackend>();
-  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
-  if (option.model_format == Frontend::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
-             "Load model from ONNX failed while initliazing TrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, trt_option),
-             "Load model from Paddle failed while initliazing TrtBackend.");
-  }
-#else
-  FDASSERT(false,
-           "TrtBackend is not available, please compiled with "
-           "ENABLE_TRT_BACKEND=ON.");
-#endif
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/fastdeploy_runtime.h b/csrcs/fastdeploy/fastdeploy_runtime.h
deleted file mode 100644
index 780945458..000000000
--- a/csrcs/fastdeploy/fastdeploy_runtime.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include <map>
-#include <vector>
-
-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/utils/perf.h"
-
-namespace fastdeploy {
-
-enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
-// AUTOREC will according to the name of model file
-// to decide which Frontend is
-enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
-
-FASTDEPLOY_DECL std::string Str(const Backend& b);
-FASTDEPLOY_DECL std::string Str(const Frontend& f);
-FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
-
-FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
-
-bool CheckModelFormat(const std::string& model_file,
-                      const Frontend& model_format);
-Frontend GuessModelFormat(const std::string& model_file);
-
-struct FASTDEPLOY_DECL RuntimeOption {
-  // set path of model file and params file
-  // for onnx, only need to define model_file, but also need to
-  // define model_format
-  // model_format support 'paddle' / 'onnx' now.
-  void SetModelPath(const std::string& model_path,
-                    const std::string& params_path = "",
-                    const std::string& _model_format = "paddle");
-
-  // set model inference in GPU
-  void UseCpu();
-
-  // set model inference in CPU
-  void UseGpu(int gpu_id = 0);
-
-  // set number of thread while inference in CPU
-  void SetCpuThreadNum(int thread_num);
-
-  // use paddle inference backend
-  void UsePaddleBackend();
-
-  // use onnxruntime backend
-  void UseOrtBackend();
-
-  // use tensorrt backend
-  void UseTrtBackend();
-
-  // enable mkldnn while use paddle inference in CPU
-  void EnablePaddleMKLDNN();
-  // disable mkldnn while use paddle inference in CPU
-  void DisablePaddleMKLDNN();
-
-  // set size of cached shape while enable mkldnn with paddle inference backend
-  void SetPaddleMKLDNNCacheSize(int size);
-
-  // set tensorrt shape while the inputs of model contain dynamic shape
-  // min_shape: the minimum shape
-  // opt_shape: the most common shape while inference, default be empty
-  // max_shape: the maximum shape, default be empty
-
-  // if opt_shape, max_shape are empty, they will keep same with the min_shape
-  // which means the shape will be fixed as min_shape while inference
-  void SetTrtInputShape(
-      const std::string& input_name, const std::vector<int32_t>& min_shape,
-      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
-      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
-
-  // enable half precision while use tensorrt backend
-  void EnableTrtFP16();
-  // disable half precision, change to full precision(float32)
-  void DisableTrtFP16();
-
-  void SetTrtCacheFile(const std::string& cache_file_path);
-
-  Backend backend = Backend::UNKNOWN;
-  // for cpu inference and preprocess
-  int cpu_thread_num = 8;
-  int device_id = 0;
-
-  Device device = Device::CPU;
-
-  // ======Only for ORT Backend========
-  // -1 means use default value by ort
-  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
-  // ORT_ENABLE_ALL
-  int ort_graph_opt_level = -1;
-  int ort_inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
-  int ort_execution_mode = -1;
-
-  // ======Only for Paddle Backend=====
-  bool pd_enable_mkldnn = true;
-  int pd_mkldnn_cache_size = 1;
-
-  // ======Only for Trt Backend=======
-  std::map<std::string, std::vector<int32_t>> trt_max_shape;
-  std::map<std::string, std::vector<int32_t>> trt_min_shape;
-  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
-  std::string trt_serialize_file = "";
-  bool trt_enable_fp16 = false;
-  bool trt_enable_int8 = false;
-  size_t trt_max_batch_size = 32;
-  size_t trt_max_workspace_size = 1 << 30;
-
-  std::string model_file = "";   // Path of model file
-  std::string params_file = "";  // Path of parameters file, can be empty
-  Frontend model_format = Frontend::AUTOREC;  // format of input model
-
-  // inside parameters, only for inside usage
-  // remove multiclass_nms in Paddle2ONNX
-  bool remove_multiclass_nms_ = false;
-  // for Paddle2ONNX to export custom operators
-  std::map<std::string, std::string> custom_op_info_;
-};
-
-struct FASTDEPLOY_DECL Runtime {
- public:
-  //  explicit Runtime(const RuntimeOption& _option = RuntimeOption());
-
-  bool Init(const RuntimeOption& _option);
-
-  bool Infer(std::vector<FDTensor>& input_tensors,
-             std::vector<FDTensor>* output_tensors);
-
-  void CreateOrtBackend();
-
-  void CreatePaddleBackend();
-
-  void CreateTrtBackend();
-
-  int NumInputs() { return backend_->NumInputs(); }
-  int NumOutputs() { return backend_->NumOutputs(); }
-  TensorInfo GetInputInfo(int index);
-  TensorInfo GetOutputInfo(int index);
-
-  RuntimeOption option;
-
- private:
-  std::unique_ptr<BaseBackend> backend_;
-};
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/function/eigen.cc b/csrcs/fastdeploy/function/eigen.cc
deleted file mode 100644
index adcfbb195..000000000
--- a/csrcs/fastdeploy/function/eigen.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/function/eigen.h"
-
-namespace fastdeploy {
-
-std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::instance_ = nullptr;
-
-std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::GetInstance() {
-  if (instance_ == nullptr) {
-    instance_ = std::make_shared<EigenDeviceWrapper>();
-  }
-  return instance_;
-}
-
-const Eigen::DefaultDevice* EigenDeviceWrapper::GetDevice() const {
-  return &device_;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/function/eigen.h b/csrcs/fastdeploy/function/eigen.h
deleted file mode 100644
index 32bacf064..000000000
--- a/csrcs/fastdeploy/function/eigen.h
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <memory>
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "unsupported/Eigen/CXX11/Tensor"
-
-namespace fastdeploy {
-// EigenDim converts shape into Eigen::DSizes.
-template <int D>
-struct EigenDim {
-  using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
-
-  static Type From(const std::vector<int64_t>& dims) {
-    Type ret;
-    for (int64_t d = 0; d < dims.size(); d++) {
-      ret[d] = dims[d];
-    }
-    return ret;
-  }
-};
-
-// Interpret FDTensor as EigenTensor and EigenConstTensor.
-template <typename T, size_t D, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenTensor {
-  using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
-
-  using ConstType =
-      Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
-
-  static Type From(FDTensor& tensor,
-                   const std::vector<int64_t>& dims) {  // NOLINT
-    return Type(reinterpret_cast<T*>(tensor.Data()), EigenDim<D>::From(dims));
-  }
-
-  static Type From(FDTensor& tensor) {  // NOLINT
-    return From(tensor, tensor.shape);
-  }  // NOLINT
-
-  static ConstType From(const FDTensor& tensor,
-                        const std::vector<int64_t>& dims) {
-    return ConstType(reinterpret_cast<const T*>(tensor.Data()),
-                     EigenDim<D>::From(dims));
-  }
-
-  static ConstType From(const FDTensor& tensor) {
-    return From(tensor, tensor.shape);
-  }
-};
-
-template <typename T, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenScalar {
-  // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
-  using Type = Eigen::TensorMap<
-      Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
-  using ConstType = Eigen::TensorMap<
-      Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
-
-  static Type From(FDTensor& tensor) {
-    return Type(reinterpret_cast<T*>(tensor.Data()));
-  }  // NOLINT
-
-  static ConstType From(const FDTensor& tensor) {
-    return ConstType(reinterpret_cast<const T*>(tensor.Data()));
-  }
-};
-
-template <typename T, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
-  // Flatten reshapes a Tensor into an EigenVector.
-  static typename EigenVector::Type Flatten(FDTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {tensor.Numel()});
-  }
-
-  static typename EigenVector::ConstType Flatten(
-      const FDTensor& tensor) {  // NOLINT
-    return EigenVector::From(tensor, {tensor.Numel()});
-  }
-};
-
-class EigenDeviceWrapper {
- public:
-  static std::shared_ptr<EigenDeviceWrapper> GetInstance();
-  const Eigen::DefaultDevice* GetDevice() const;
-
- private:
-  Eigen::DefaultDevice device_;
-  static std::shared_ptr<EigenDeviceWrapper> instance_;
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/function/reduce.cc b/csrcs/fastdeploy/function/reduce.cc
deleted file mode 100644
index 897504e05..000000000
--- a/csrcs/fastdeploy/function/reduce.cc
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <set>
-
-#include "fastdeploy/function/eigen.h"
-#include "fastdeploy/function/reduce.h"
-#include "fastdeploy/function/reduce_functor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-#ifdef ENABLE_FDTENSOR_FUNC
-
-template <typename T, size_t D, size_t R_D, typename Functor>
-void ReduceFunctor(const FDTensor& input, FDTensor* output,
-                   const std::vector<int64_t>& dims, bool keep_dim) {
-  auto x = EigenTensor<T, D>::From(input);
-  auto x_rank = static_cast<int>(x.dimensions().size());
-  auto reduce_dim = Eigen::array<int, R_D>();
-  std::vector<int64_t> dims_ref = dims;
-
-  auto out_dims = input.shape;
-  for (size_t i = 0; i < dims_ref.size(); ++i) {
-    if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i];
-    reduce_dim[i] = dims_ref[i];
-    out_dims[dims_ref[i]] = 1;
-  }
-  auto origin_output_dims = out_dims;
-  output->Allocate(origin_output_dims, TypeToDataType<T>::dtype);
-  // construct the squeezed output tensor
-  if (x_rank > 1) {
-    const int kDelFlag = -2;
-    for (size_t i = 0; i < dims_ref.size(); ++i) {
-      out_dims[dims_ref[i]] = kDelFlag;
-    }
-    out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag),
-                   out_dims.end());
-  }
-
-  auto& place = *EigenDeviceWrapper::GetInstance()->GetDevice();
-  Functor functor;
-  if (D == 1) {
-    auto out = EigenScalar<T>::From(*output);
-    functor(place, &x, &out, reduce_dim);
-  } else {
-    auto out = EigenTensor<T, (D - R_D)>::From(*output, out_dims);
-    functor(place, &x, &out, reduce_dim);
-    if (!keep_dim) {
-      output->shape = std::move(out_dims);
-    }
-  }
-}
-
-#define HANDLE_REDUCE_DIM(NDIM, RDIM)                                        \
-  if (ndim == NDIM && rdim == RDIM) {                                        \
-    ReduceFunctor<OutT, NDIM, RDIM, Functor>(input, output, dims, keep_dim); \
-  }
-
-inline void GetShuffledDim(const std::vector<int64_t>& src_dims,
-                           std::vector<int64_t>* dst_dims,
-                           const std::vector<int64_t>& reduced_dims,
-                           std::vector<int>* perm_axis) {
-  // check if it's a reduced dim
-  std::vector<bool> src_dims_check(src_dims.size(), false);
-  size_t src_size = src_dims.size();
-  size_t reduce_size = reduced_dims.size();
-  std::vector<int64_t> regular_reduced_dims = reduced_dims;
-  for (size_t i = 0; i < regular_reduced_dims.size(); i++) {
-    if (regular_reduced_dims[i] < 0) {
-      regular_reduced_dims[i] = src_size + regular_reduced_dims[i];
-    }
-  }
-
-  for (size_t i = 0; i < reduce_size; ++i) {
-    dst_dims->at(src_size - reduce_size + i) =
-        src_dims[regular_reduced_dims[i]];
-    (*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i];
-    src_dims_check[regular_reduced_dims[i]] = true;
-  }
-
-  size_t offset = 0;
-  for (size_t i = 0; i < src_dims_check.size(); ++i) {
-    bool is_reduced = src_dims_check[i];
-    if (!is_reduced) {
-      (*perm_axis)[offset] = i;
-      dst_dims->at(offset++) = src_dims[i];
-    }
-  }
-}
-
-template <typename OutT>
-void GetShuffledInput(const FDTensor& input, FDTensor* shuffled_input,
-                      const std::vector<int64_t>& dims) {
-  auto shuffled_dims = input.shape;
-  std::vector<int> perm_axis(input.shape.size());
-  GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis);
-
-  shuffled_input->Allocate(shuffled_dims, input.dtype);
-  // TODO(zhoushunjie) : Need to implement trans function
-  // phi::funcs::TransposeNormal<DeviceContext, OutT> trans;
-  // trans(dev_ctx, input, shuffled_input, perm_axis);
-}
-
-//////////////// HandleLargeDim
-template <typename OutT, typename Functor>
-void HandleLargeDim(const FDTensor& input, FDTensor* output,
-                    const std::vector<int64_t>& dims, bool keep_dim) {
-  //  shuffle the reduced dim to the end
-  FDTensor shuffled_input;
-  GetShuffledInput<OutT>(input, &shuffled_input, dims);
-
-  // transpose to 2D tensor whose shape is {unreduced, reduced}.
-  const int64_t unreduced = output->Numel();
-  const int64_t reduced = shuffled_input.Numel() / unreduced;
-  shuffled_input.Allocate({unreduced, reduced}, TypeToDataType<OutT>::dtype);
-
-  auto output_dim = output->shape;
-  output->Allocate({unreduced}, TypeToDataType<OutT>::dtype);
-
-  ReduceFunctor<OutT, 2, 1, Functor>(shuffled_input, output, {1}, keep_dim);
-  output->shape = output_dim;
-}
-
-////////////// ReduceKernel
-
-template <typename OutT, typename Functor>
-void ReduceKernelImpl(const FDTensor& input, FDTensor* output,
-                      const std::vector<int64_t>& dims, bool keep_dim,
-                      bool reduce_all) {
-  output->Allocate({1}, TypeToDataType<OutT>::dtype);
-  const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
-  if (reduce_all) {
-    // Flatten and reduce 1-D tensor
-    auto x = EigenVector<OutT>::Flatten(input);
-    auto out = EigenScalar<OutT>::From(*output);
-    auto reduce_dim = Eigen::array<int, 1>({{0}});
-
-    Functor functor;
-    functor(dev, &x, &out, reduce_dim);
-  } else {
-    int ndim = input.shape.size();
-    int rdim = dims.size();
-    if (ndim > 3) {
-      HandleLargeDim<OutT, Functor>(input, output, dims, keep_dim);
-    } else {
-      HANDLE_REDUCE_DIM(4, 3);
-      HANDLE_REDUCE_DIM(4, 2);
-      HANDLE_REDUCE_DIM(4, 1);
-      HANDLE_REDUCE_DIM(3, 2);
-      HANDLE_REDUCE_DIM(3, 1);
-      HANDLE_REDUCE_DIM(2, 1);
-      HANDLE_REDUCE_DIM(1, 1);
-    }
-  }
-}
-
-template <typename OutT, typename Functor>
-void BoolReduceKernel(const FDTensor& input, FDTensor* output,
-                      const std::vector<int64_t>& dims, bool keep_dim,
-                      bool reduce_all) {
-  // The dims has full dim, set the reduce_all is True
-  const auto& input_dim_size = input.shape.size();
-  std::set<int> dims_set(dims.begin(), dims.end());
-  bool full_dim = true;
-  for (auto i = 0; i < input_dim_size; i++) {
-    if (dims_set.find(i) == dims_set.end()) {
-      full_dim = false;
-      break;
-    }
-  }
-  reduce_all = (reduce_all || full_dim);
-
-  ReduceKernelImpl<bool, Functor>(input, output, dims, keep_dim, reduce_all);
-}
-
-template <typename Functor>
-void Reduce(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-            bool keep_dim, bool reduce_all) {
-  // If the dims has full dim, set the reduce_all is True
-  const int& input_dim_size = x.shape.size();
-  std::set<int> dims_set(dims.begin(), dims.end());
-  bool full_dim = true;
-  for (int i = 0; i < input_dim_size; ++i) {
-    if (dims_set.find(i) == dims_set.end() &&
-        dims_set.find(i - input_dim_size) == dims_set.end()) {
-      full_dim = false;
-      break;
-    }
-  }
-  reduce_all = (reduce_all || full_dim);
-
-  FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
-                       ReduceKernelImpl<data_t, Functor>(x, out, dims, keep_dim,
-                                                         reduce_all);
-                     }));
-}
-
-void Max(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<MaxFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Min(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<MinFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Sum(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  Reduce<SumFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void All(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  BoolReduceKernel<bool, AllFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Any(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-         bool keep_dim, bool reduce_all) {
-  BoolReduceKernel<bool, AnyFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Mean(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-          bool keep_dim, bool reduce_all) {
-  Reduce<MeanFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-
-void Prod(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
-          bool keep_dim, bool reduce_all) {
-  Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
-}
-#endif
-
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/function/reduce.h b/csrcs/fastdeploy/function/reduce.h
deleted file mode 100644
index af8810c6b..000000000
--- a/csrcs/fastdeploy/function/reduce.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/core/fd_tensor.h"
-
-namespace fastdeploy {
-
-#ifdef ENABLE_FDTENSOR_FUNC
-/** Excute the maximum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Max(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the minimum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Min(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the sum operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Sum(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the all operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void All(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the any operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Any(const FDTensor& x, FDTensor* out,
-                         const std::vector<int64_t>& dims,
-                         bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the mean operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Mean(const FDTensor& x, FDTensor* out,
-                          const std::vector<int64_t>& dims,
-                          bool keep_dim = false, bool reduce_all = false);
-
-/** Excute the product operation for input FDTensor along given dims.
-    @param x The input tensor.
-    @param out The output tensor which stores the result.
-    @param dims The vector of axis which will be reduced.
-    @param keep_dim Whether to keep the reduced dims, default false.
-    @param reduce_all Whether to reduce all dims, default false.
-*/
-FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out,
-                          const std::vector<int64_t>& dims,
-                          bool keep_dim = false, bool reduce_all = false);
-
-#endif
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/function/reduce_functor.h b/csrcs/fastdeploy/function/reduce_functor.h
deleted file mode 100644
index de0c45bb3..000000000
--- a/csrcs/fastdeploy/function/reduce_functor.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/function/eigen.h"
-namespace fastdeploy {
-
-//////// Max Functor ///////
-struct MaxFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->maximum(dim);
-  }
-};
-
-//////// Min Functor ///////
-struct MinFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->minimum(dim);
-  }
-};
-
-//////// Sum Functor ///////
-struct SumFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->sum(dim);
-  }
-};
-
-//////// All Functor ///////
-struct AllFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->all(dim);
-  }
-};
-
-//////// Any Functor ///////
-struct AnyFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->any(dim);
-  }
-};
-
-//////// Mean Functor ///////
-struct MeanFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->mean(dim);
-  }
-};
-
-//////// Prod Functor ///////
-struct ProdFunctor {
-  template <typename X, typename Y, typename Dim>
-  void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
-    y->device(dev) = x->prod(dim);
-  }
-};
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/pybind/fastdeploy_model.cc b/csrcs/fastdeploy/pybind/fastdeploy_model.cc
deleted file mode 100644
index b59c0fd0f..000000000
--- a/csrcs/fastdeploy/pybind/fastdeploy_model.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-#include "fastdeploy/fastdeploy_model.h"
-
-namespace fastdeploy {
-
-void BindFDModel(pybind11::module& m) {
-  pybind11::class_<FastDeployModel>(m, "FastDeployModel")
-      .def(pybind11::init<>(), "Default Constructor")
-      .def("model_name", &FastDeployModel::ModelName)
-      .def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime)
-      .def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime)
-      .def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime)
-      .def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime)
-      .def("initialized", &FastDeployModel::Initialized)
-      .def_readwrite("runtime_option", &FastDeployModel::runtime_option)
-      .def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends)
-      .def_readwrite("valid_gpu_backends",
-                     &FastDeployModel::valid_gpu_backends);
-}
-
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc b/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc
deleted file mode 100644
index 412b1ccef..000000000
--- a/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindRuntime(pybind11::module& m) {
-  pybind11::class_<RuntimeOption>(m, "RuntimeOption")
-      .def(pybind11::init())
-      .def("set_model_path", &RuntimeOption::SetModelPath)
-      .def("use_gpu", &RuntimeOption::UseGpu)
-      .def("use_cpu", &RuntimeOption::UseCpu)
-      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
-      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
-      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
-      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
-      .def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN)
-      .def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN)
-      .def("set_paddle_mkldnn_cache_size",
-           &RuntimeOption::SetPaddleMKLDNNCacheSize)
-      .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
-      .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
-      .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
-      .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
-      .def_readwrite("model_file", &RuntimeOption::model_file)
-      .def_readwrite("params_file", &RuntimeOption::params_file)
-      .def_readwrite("model_format", &RuntimeOption::model_format)
-      .def_readwrite("backend", &RuntimeOption::backend)
-      .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
-      .def_readwrite("device_id", &RuntimeOption::device_id)
-      .def_readwrite("device", &RuntimeOption::device)
-      .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
-      .def_readwrite("ort_inter_op_num_threads",
-                     &RuntimeOption::ort_inter_op_num_threads)
-      .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
-      .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
-      .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
-      .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
-      .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
-      .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
-      .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
-      .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
-      .def_readwrite("trt_max_workspace_size",
-                     &RuntimeOption::trt_max_workspace_size);
-
-  pybind11::class_<TensorInfo>(m, "TensorInfo")
-      .def_readwrite("name", &TensorInfo::name)
-      .def_readwrite("shape", &TensorInfo::shape)
-      .def_readwrite("dtype", &TensorInfo::dtype);
-
-  pybind11::class_<Runtime>(m, "Runtime")
-      .def(pybind11::init())
-      .def("init", &Runtime::Init)
-      .def("infer",
-           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
-             std::vector<FDTensor> inputs(data.size());
-             int index = 0;
-             for (auto iter = data.begin(); iter != data.end(); ++iter) {
-               inputs[index].dtype =
-                   NumpyDataTypeToFDDataType(iter->second.dtype());
-               inputs[index].shape.insert(
-                   inputs[index].shape.begin(), iter->second.shape(),
-                   iter->second.shape() + iter->second.ndim());
-               // TODO(jiangjiajun) Maybe skip memory copy is a better choice
-               // use SetExternalData
-               inputs[index].data.resize(iter->second.nbytes());
-               memcpy(inputs[index].data.data(), iter->second.mutable_data(),
-                      iter->second.nbytes());
-               inputs[index].name = iter->first;
-               index += 1;
-             }
-
-             std::vector<FDTensor> outputs(self.NumOutputs());
-             self.Infer(inputs, &outputs);
-
-             std::vector<pybind11::array> results;
-             results.reserve(outputs.size());
-             for (size_t i = 0; i < outputs.size(); ++i) {
-               auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
-               results.emplace_back(
-                   pybind11::array(numpy_dtype, outputs[i].shape));
-               memcpy(results[i].mutable_data(), outputs[i].data.data(),
-                      outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
-             }
-             return results;
-           })
-      .def("num_inputs", &Runtime::NumInputs)
-      .def("num_outputs", &Runtime::NumOutputs)
-      .def("get_input_info", &Runtime::GetInputInfo)
-      .def("get_output_info", &Runtime::GetOutputInfo)
-      .def_readonly("option", &Runtime::option);
-
-  pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
-                           "Backend for inference.")
-      .value("UNKOWN", Backend::UNKNOWN)
-      .value("ORT", Backend::ORT)
-      .value("TRT", Backend::TRT)
-      .value("PDINFER", Backend::PDINFER);
-  pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
-                            "Frontend for inference.")
-      .value("PADDLE", Frontend::PADDLE)
-      .value("ONNX", Frontend::ONNX);
-  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
-                          "Device for inference.")
-      .value("CPU", Device::CPU)
-      .value("GPU", Device::GPU);
-
-  pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
-                              "Data type of FastDeploy.")
-      .value("BOOL", FDDataType::BOOL)
-      .value("INT8", FDDataType::INT8)
-      .value("INT16", FDDataType::INT16)
-      .value("INT32", FDDataType::INT32)
-      .value("INT64", FDDataType::INT64)
-      .value("FP32", FDDataType::FP32)
-      .value("FP64", FDDataType::FP64)
-      .value("UINT8", FDDataType::UINT8);
-
-  m.def("get_available_backends", []() { return GetAvailableBackends(); });
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/pybind/main.cc.in b/csrcs/fastdeploy/pybind/main.cc.in
deleted file mode 100644
index 13e0a31c4..000000000
--- a/csrcs/fastdeploy/pybind/main.cc.in
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindRuntime(pybind11::module&);
-void BindFDModel(pybind11::module&);
-void BindVision(pybind11::module&);
-
-pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
-  pybind11::dtype dt;
-  if (fd_dtype == FDDataType::INT32) {
-    dt = pybind11::dtype::of<int32_t>();
-  } else if (fd_dtype == FDDataType::INT64) {
-    dt = pybind11::dtype::of<int64_t>();
-  } else if (fd_dtype == FDDataType::FP32) {
-    dt = pybind11::dtype::of<float>();
-  } else if (fd_dtype == FDDataType::FP64) {
-    dt = pybind11::dtype::of<double>();
-  } else if (fd_dtype == FDDataType::UINT8) {
-    dt = pybind11::dtype::of<uint8_t>();
-  } else {
-    FDASSERT(false, "The function doesn't support data type of " +
-                        Str(fd_dtype) + ".");
-  }
-  return dt;
-}
-
-FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
-  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
-    return FDDataType::INT32;
-  } else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
-    return FDDataType::INT64;
-  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
-    return FDDataType::FP32;
-  } else if (np_dtype.is(pybind11::dtype::of<double>())) {
-    return FDDataType::FP64;
-  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
-    return FDDataType::UINT8;
-  }
-  FDASSERT(false,
-           "NumpyDataTypeToFDDataType() only support "
-           "int32/int64/float32/float64 now.");
-  return FDDataType::FP32;
-}
-
-void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
-                     bool share_buffer) {
-  tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
-  tensor->shape.insert(tensor->shape.begin(), pyarray.shape(),
-                       pyarray.shape() + pyarray.ndim());
-  if (share_buffer) {
-    tensor->external_data_ptr = pyarray.mutable_data();
-  } else {
-    tensor->data.resize(pyarray.nbytes());
-    memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
-  }
-}
-
-pybind11::array TensorToPyArray(const FDTensor& tensor) {
-  auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype);
-  auto out = pybind11::array(numpy_dtype, tensor.shape);
-  memcpy(out.mutable_data(), tensor.Data(), tensor.Numel() * FDDataTypeSize(tensor.dtype));
-  return out;
-}
-
-#ifdef ENABLE_VISION
-int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
-  if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
-    return CV_32S;
-  } else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
-    return CV_8U;
-  } else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
-    return CV_8U;
-  } else if (np_dtype.is(pybind11::dtype::of<float>())) {
-    return CV_32F;
-  } else {
-    FDASSERT(
-        false,
-        "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
-        "now.");
-  }
-  return CV_8U;
-}
-
-cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
-  auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
-  FDASSERT(
-      pyarray.ndim() == 3,
-      "Require rank of array to be 3 with HWC format while converting it to "
-      "cv::Mat.");
-  int channel = *(pyarray.shape() + 2);
-  int height = *(pyarray.shape());
-  int width = *(pyarray.shape() + 1);
-  return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
-                 pyarray.mutable_data());
-}
-#endif
-
-PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
-  m.doc() =
-      "Make programer easier to deploy deeplearning model, save time to save "
-      "the world!";
-
-  BindRuntime(m);
-  BindFDModel(m);
-#ifdef ENABLE_VISION
-  auto vision_module =
-      m.def_submodule("vision", "Vision module of FastDeploy.");
-  BindVision(vision_module);
-#endif
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/pybind/main.h b/csrcs/fastdeploy/pybind/main.h
deleted file mode 100644
index 23f0eccc2..000000000
--- a/csrcs/fastdeploy/pybind/main.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <type_traits>
-
-#include "fastdeploy/fastdeploy_runtime.h"
-
-#ifdef ENABLE_VISION
-#include "fastdeploy/vision.h"
-#endif
-
-namespace fastdeploy {
-
-void BindBackend(pybind11::module&);
-void BindVision(pybind11::module&);
-
-pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
-
-FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype);
-
-void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
-                     bool share_buffer = false);
-pybind11::array TensorToPyArray(const FDTensor& tensor);
-
-#ifdef ENABLE_VISION
-cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
-#endif
-
-template <typename T>
-FDDataType CTypeToFDDataType() {
-  if (std::is_same<T, int32_t>::value) {
-    return FDDataType::INT32;
-  } else if (std::is_same<T, int64_t>::value) {
-    return FDDataType::INT64;
-  } else if (std::is_same<T, float>::value) {
-    return FDDataType::FP32;
-  } else if (std::is_same<T, double>::value) {
-    return FDDataType::FP64;
-  }
-  FDASSERT(false,
-           "CTypeToFDDataType only support int32/int64/float32/float64 now.");
-  return FDDataType::FP32;
-}
-
-template <typename T>
-std::vector<pybind11::array> PyBackendInfer(
-    T& self, const std::vector<std::string>& names,
-    std::vector<pybind11::array>& data) {
-  std::vector<FDTensor> inputs(data.size());
-  for (size_t i = 0; i < data.size(); ++i) {
-    // TODO(jiangjiajun) here is considered to use user memory directly
-    inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype());
-    inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
-                           data[i].shape() + data[i].ndim());
-    inputs[i].data.resize(data[i].nbytes());
-    memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
-    inputs[i].name = names[i];
-  }
-
-  std::vector<FDTensor> outputs(self.NumOutputs());
-  self.Infer(inputs, &outputs);
-
-  std::vector<pybind11::array> results;
-  results.reserve(outputs.size());
-  for (size_t i = 0; i < outputs.size(); ++i) {
-    auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
-    results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
-    memcpy(results[i].mutable_data(), outputs[i].data.data(),
-           outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
-  }
-  return results;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text.h b/csrcs/fastdeploy/text.h
deleted file mode 100644
index 184f0f4f9..000000000
--- a/csrcs/fastdeploy/text.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include "fastdeploy/core/config.h"
-#ifdef ENABLE_TEXT
-#include "fastdeploy/text/text_model.h"
-#endif
diff --git a/csrcs/fastdeploy/text/common/option.h b/csrcs/fastdeploy/text/common/option.h
deleted file mode 100644
index a795fd066..000000000
--- a/csrcs/fastdeploy/text/common/option.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-struct FASTDEPLOY_DECL TextPreprocessOption {};
-struct FASTDEPLOY_DECL TextPostprocessOption {};
-struct FASTDEPLOY_DECL PredictionOption {};
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/common/result.cc b/csrcs/fastdeploy/text/common/result.cc
deleted file mode 100644
index cb7efbb73..000000000
--- a/csrcs/fastdeploy/text/common/result.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/text/common/result.h"
-
-namespace fastdeploy {
-namespace text {}  // namespace text
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/common/result.h b/csrcs/fastdeploy/text/common/result.h
deleted file mode 100644
index 4a6f716a3..000000000
--- a/csrcs/fastdeploy/text/common/result.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-struct FASTDEPLOY_DECL Result {};
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
deleted file mode 100644
index e8f717743..000000000
--- a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/postprocessor/postprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool Postprocessor::Decode(const std::vector<FDTensor>& model_result,
-                           Result* decoded_result) const {
-  return true;
-}
-
-bool Postprocessor::DecodeBatch(const std::vector<FDTensor>& model_result,
-                                Result* decoded_result) const {
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.h b/csrcs/fastdeploy/text/postprocessor/postprocessor.h
deleted file mode 100644
index 76f6a7090..000000000
--- a/csrcs/fastdeploy/text/postprocessor/postprocessor.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/text/common/result.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Postprocessor {
- public:
-  virtual bool Decode(const std::vector<FDTensor>& model_result,
-                      Result* decoded_result) const;
-  virtual bool DecodeBatch(const std::vector<FDTensor>& model_result,
-                           Result* decoded_result) const;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
deleted file mode 100644
index 2e2715f61..000000000
--- a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/preprocessor/preprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool Preprocessor::Encode(const std::string& raw_text,
-                          std::vector<FDTensor>* encoded_tensor) const {
-  return true;
-}
-
-bool Preprocessor::EncodeBatch(const std::vector<std::string>& raw_texts,
-                               std::vector<FDTensor>* encoded_tensor) const {
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.h b/csrcs/fastdeploy/text/preprocessor/preprocessor.h
deleted file mode 100644
index 799967093..000000000
--- a/csrcs/fastdeploy/text/preprocessor/preprocessor.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <memory>
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Preprocessor {
- public:
-  virtual bool Encode(const std::string& raw_text,
-                      std::vector<FDTensor>* encoded_tensor) const;
-  virtual bool EncodeBatch(const std::vector<std::string>& raw_texts,
-                           std::vector<FDTensor>* encoded_tensor) const;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/text_model.cc b/csrcs/fastdeploy/text/text_model.cc
deleted file mode 100644
index d5a40c0e5..000000000
--- a/csrcs/fastdeploy/text/text_model.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/text/text_model.h"
-#include "fastdeploy/text/common/option.h"
-#include "fastdeploy/text/common/result.h"
-#include "fastdeploy/text/postprocessor/postprocessor.h"
-#include "fastdeploy/text/preprocessor/preprocessor.h"
-
-namespace fastdeploy {
-namespace text {
-
-bool TextModel::Predict(const std::string& raw_text, Result* result,
-                        const PredictionOption& option) {
-  // Preprocess
-  std::vector<FDTensor> input_tensor;
-  std::vector<FDTensor> output_tensor;
-  if (!preprocessor_->Encode(raw_text, &input_tensor)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  // Inference Runtime
-  if (!Infer(input_tensor, &output_tensor)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  // Postprocess
-  if (postprocessor_->Decode(output_tensor, result)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool TextModel::PredictBatch(const std::vector<std::string>& raw_text_array,
-                             Result* results, const PredictionOption& option) {
-  // Preprocess
-  std::vector<FDTensor> input_tensor;
-  std::vector<FDTensor> output_tensor;
-  if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  // Inference Runtime
-  if (!Infer(input_tensor, &output_tensor)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  // Postprocess
-  if (postprocessor_->DecodeBatch(output_tensor, results)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace text
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/text/text_model.h b/csrcs/fastdeploy/text/text_model.h
deleted file mode 100644
index b7fbd5929..000000000
--- a/csrcs/fastdeploy/text/text_model.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <memory>
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-
-namespace fastdeploy {
-namespace text {
-
-class Preprocessor;
-class Postprocessor;
-class Result;
-class PredictionOption;
-
-class FASTDEPLOY_DECL TextModel : public FastDeployModel {
- public:
-  virtual std::string ModelName() const { return "TextModel"; }
-  virtual bool Predict(const std::string& raw_text, Result* result,
-                       const PredictionOption& option);
-  virtual bool PredictBatch(const std::vector<std::string>& raw_text_array,
-                            Result* result, const PredictionOption& option);
-  template <typename T, typename... Args>
-  void SetPreprocessor(Args&&... args) {
-    preprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
-  }
-  template <typename T, typename... Args>
-  void SetPostprocessor(Args&&... args) {
-    postprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
-  }
-
- private:
-  std::unique_ptr<Preprocessor> preprocessor_;
-  std::unique_ptr<Postprocessor> postprocessor_;
-};
-
-}  // namespace text
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/text/text_pybind.cc b/csrcs/fastdeploy/text/text_pybind.cc
deleted file mode 100644
index 564892f16..000000000
--- a/csrcs/fastdeploy/text/text_pybind.cc
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
\ No newline at end of file
diff --git a/csrcs/fastdeploy/utils/perf.h b/csrcs/fastdeploy/utils/perf.h
deleted file mode 100644
index 9f451c3a9..000000000
--- a/csrcs/fastdeploy/utils/perf.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/utils/utils.h"
-#include <chrono> // NOLINT
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL TimeCounter {
- public:
-  void Start() { begin_ = std::chrono::system_clock::now(); }
-
-  void End() { end_ = std::chrono::system_clock::now(); }
-
-  double Duration() {
-    auto duration =
-        std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
-    return static_cast<double>(duration.count()) *
-           std::chrono::microseconds::period::num /
-           std::chrono::microseconds::period::den;
-  }
-
-  void PrintInfo(const std::string& prefix = "TimeCounter: ",
-                 bool print_out = true) {
-    if (!print_out) {
-      return;
-    }
-    FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
-  }
-
- private:
-  std::chrono::time_point<std::chrono::system_clock> begin_;
-  std::chrono::time_point<std::chrono::system_clock> end_;
-};
-
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/utils/unique_ptr.h b/csrcs/fastdeploy/utils/unique_ptr.h
deleted file mode 100644
index 2f24ef70c..000000000
--- a/csrcs/fastdeploy/utils/unique_ptr.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <memory>
-
-namespace fastdeploy {
-namespace utils {
-// Trait to select overloads and return types for MakeUnique.
-template <typename T>
-struct MakeUniqueResult {
-  using scalar = std::unique_ptr<T>;
-};
-template <typename T>
-struct MakeUniqueResult<T[]> {
-  using array = std::unique_ptr<T[]>;
-};
-template <typename T, size_t N>
-struct MakeUniqueResult<T[N]> {
-  using invalid = void;
-};
-
-// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
-// It is designed to be 100% compatible with std::make_unique so that the
-// eventual switchover will be a simple renaming operation.
-template <typename T, typename... Args>
-typename MakeUniqueResult<T>::scalar make_unique(Args &&... args) {  // NOLINT
-  return std::unique_ptr<T>(
-      new T(std::forward<Args>(args)...));  // NOLINT(build/c++11)
-}
-
-// Overload for array of unknown bound.
-// The allocation of arrays needs to use the array form of new,
-// and cannot take element constructor arguments.
-template <typename T>
-typename MakeUniqueResult<T>::array make_unique(size_t n) {
-  return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
-}
-
-// Reject arrays of known bound.
-template <typename T, typename... Args>
-typename MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) =
-    delete;  // NOLINT
-
-}  // namespace utils
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/utils/utils.cc b/csrcs/fastdeploy/utils/utils.cc
deleted file mode 100644
index 3899bcf5e..000000000
--- a/csrcs/fastdeploy/utils/utils.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-
-FDLogger::FDLogger(bool verbose, const std::string& prefix) {
-  verbose_ = verbose;
-  line_ = "";
-  prefix_ = prefix;
-}
-
-FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) {
-  if (!verbose_) {
-    return *this;
-  }
-  std::cout << prefix_ << " " << line_ << std::endl;
-  line_ = "";
-  return *this;
-}
-
-bool ReadBinaryFromFile(const std::string& file, std::string* contents) {
-  std::ifstream fin(file, std::ios::in | std::ios::binary);
-  if (!fin.is_open()) {
-    FDERROR << "Failed to open file: " << file << " to read." << std::endl;
-    return false;
-  }
-  fin.seekg(0, std::ios::end);
-  contents->clear();
-  contents->resize(fin.tellg());
-  fin.seekg(0, std::ios::beg);
-  fin.read(&(contents->at(0)), contents->size());
-  fin.close();
-  return true;
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/utils/utils.h b/csrcs/fastdeploy/utils/utils.h
deleted file mode 100644
index 3e309a12a..000000000
--- a/csrcs/fastdeploy/utils/utils.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <stdlib.h>
-
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <string>
-
-#if defined(_WIN32)
-#ifdef FASTDEPLOY_LIB
-#define FASTDEPLOY_DECL __declspec(dllexport)
-#else
-#define FASTDEPLOY_DECL __declspec(dllimport)
-#endif  // FASTDEPLOY_LIB
-#else
-#define FASTDEPLOY_DECL __attribute__((visibility("default")))
-#endif  // _WIN32
-
-namespace fastdeploy {
-
-class FASTDEPLOY_DECL FDLogger {
- public:
-  FDLogger() {
-    line_ = "";
-    prefix_ = "[FastDeploy]";
-    verbose_ = true;
-  }
-  explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]");
-
-  template <typename T>
-  FDLogger& operator<<(const T& val) {
-    if (!verbose_) {
-      return *this;
-    }
-    std::stringstream ss;
-    ss << val;
-    line_ += ss.str();
-    return *this;
-  }
-  FDLogger& operator<<(std::ostream& (*os)(std::ostream&));
-  ~FDLogger() {
-    if (!verbose_ && line_ != "") {
-      std::cout << line_ << std::endl;
-    }
-  }
-
- private:
-  std::string line_;
-  std::string prefix_;
-  bool verbose_ = true;
-};
-
-FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
-                                        std::string* contents);
-
-#ifndef __REL_FILE__
-#define __REL_FILE__ __FILE__
-#endif
-
-#define FDERROR                                                \
-  FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \
-                            << ")::" << __FUNCTION__ << "\t"
-
-#define FDWARNING                                                \
-  FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \
-                              << ")::" << __FUNCTION__ << "\t"
-
-#define FDINFO                                                \
-  FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
-                           << ")::" << __FUNCTION__ << "\t"
-
-#define FDASSERT(condition, message) \
-  if (!(condition)) {                \
-    FDERROR << message << std::endl; \
-    std::abort();                    \
-  }
-
-///////// Basic Marco ///////////
-
-#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \
-  case enum_type: {                                                       \
-    using HINT = type;                                                    \
-    __VA_ARGS__();                                                        \
-    break;                                                                \
-  }
-
-#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \
-  FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
-
-#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...)                                \
-  [&] {                                                                    \
-    const auto& __dtype__ = TYPE;                                          \
-    switch (__dtype__) {                                                   \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::BOOL, bool,     \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float,    \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double,   \
-                           __VA_ARGS__)                                    \
-      default:                                                             \
-        FDASSERT(false, "Invalid enum data type.")                         \
-    }                                                                      \
-  }()
-
-#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...)                            \
-  [&] {                                                                  \
-    const auto& __dtype__ = TYPE;                                        \
-    switch (__dtype__) {                                                 \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float,  \
-                           __VA_ARGS__)                                  \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
-                           __VA_ARGS__)                                  \
-      default:                                                           \
-        FDASSERT(false, "Invalid enum data type.")                       \
-    }                                                                    \
-  }()
-
-#define FD_VISIT_INT_TYPES(TYPE, NAME, ...)                                \
-  [&] {                                                                    \
-    const auto& __dtype__ = TYPE;                                          \
-    switch (__dtype__) {                                                   \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
-                           __VA_ARGS__)                                    \
-      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
-                           __VA_ARGS__)                                    \
-      default:                                                             \
-        FDASSERT(false, "Invalid enum data type.")                         \
-    }                                                                      \
-  }()
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision.h b/csrcs/fastdeploy/vision.h
deleted file mode 100644
index 21371b5a1..000000000
--- a/csrcs/fastdeploy/vision.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-
-#include "fastdeploy/core/config.h"
-#ifdef ENABLE_VISION
-#include "fastdeploy/vision/detection/contrib/nanodet_plus.h"
-#include "fastdeploy/vision/detection/contrib/scaledyolov4.h"
-#include "fastdeploy/vision/detection/contrib/yolor.h"
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
-#include "fastdeploy/vision/detection/contrib/yolov5lite.h"
-#include "fastdeploy/vision/detection/contrib/yolov6.h"
-#include "fastdeploy/vision/detection/contrib/yolov7.h"
-#include "fastdeploy/vision/detection/contrib/yolox.h"
-#include "fastdeploy/vision/facedet/contrib/retinaface.h"
-#include "fastdeploy/vision/facedet/contrib/scrfd.h"
-#include "fastdeploy/vision/facedet/contrib/ultraface.h"
-#include "fastdeploy/vision/facedet/contrib/yolov5face.h"
-#include "fastdeploy/vision/faceid/contrib/arcface.h"
-#include "fastdeploy/vision/faceid/contrib/cosface.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-#include "fastdeploy/vision/faceid/contrib/partial_fc.h"
-#include "fastdeploy/vision/faceid/contrib/vpl.h"
-#include "fastdeploy/vision/matting/contrib/modnet.h"
-#include "fastdeploy/vision/ppcls/model.h"
-#include "fastdeploy/vision/detection/ppdet/model.h"
-#include "fastdeploy/vision/ppseg/model.h"
-#endif
-
-#include "fastdeploy/vision/visualize/visualize.h"
diff --git a/csrcs/fastdeploy/vision/AddModel.md b/csrcs/fastdeploy/vision/AddModel.md
deleted file mode 100644
index 30080bd5e..000000000
--- a/csrcs/fastdeploy/vision/AddModel.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# 如何添加一个模型
-
-本文档以[yolov5](https://github.com/ultralytics/yolov5)为例，说明如何添加新的模型支持。
diff --git a/csrcs/fastdeploy/vision/common/processors/base.cc b/csrcs/fastdeploy/vision/common/processors/base.cc
deleted file mode 100644
index d770522d8..000000000
--- a/csrcs/fastdeploy/vision/common/processors/base.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/base.h"
-#include "fastdeploy/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-
-ProcLib Processor::default_lib = ProcLib::DEFAULT;
-
-bool Processor::CpuRun(Mat* mat) {
-  FDERROR << "Unimplemented CpuRun." << std::endl;
-  return false;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Processor::GpuRun(Mat* mat) {
-  FDERROR << "Unimplemented GpuRun." << std::endl;
-  return false;
-}
-#endif
-
-bool Processor::operator()(Mat* mat, ProcLib lib) {
-  // if default_lib is set
-  // then use default_lib
-  ProcLib target = lib;
-  if (default_lib != ProcLib::DEFAULT) {
-    target = default_lib;
-  }
-
-  if (target == ProcLib::OPENCV_CUDA) {
-#ifdef ENABLE_OPENCV_CUDA
-    bool ret = GpuRun(mat);
-    mat->device = Device::GPU;
-    return ret;
-#else
-    FDERROR
-        << "OpenCV is not compiled with CUDA, cannot process image with CUDA."
-        << std::endl;
-    return false;
-#endif
-  }
-  bool ret = CpuRun(mat);
-  mat->device = Device::CPU;
-  return ret;
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/base.h b/csrcs/fastdeploy/vision/common/processors/base.h
deleted file mode 100644
index d4138864a..000000000
--- a/csrcs/fastdeploy/vision/common/processors/base.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/utils/utils.h"
-#include "fastdeploy/vision/common/processors/mat.h"
-#include "opencv2/highgui/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-namespace vision {
-
-enum ProcLib { DEFAULT, OPENCV_CPU, OPENCV_CUDA };
-
-class Processor {
- public:
-  // default_lib has the highest priority
-  // all the function in `processor` will force to use
-  // default_lib if this flag is set.
-  // DEFAULT means this flag is not set
-  static ProcLib default_lib;
-
-  //  virtual bool ShapeInfer(const std::vector<int>& in_shape,
-  //                          std::vector<int>* out_shape) = 0;
-  virtual std::string Name() = 0;
-  virtual bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  virtual bool GpuRun(Mat* mat);
-#endif
-
-  virtual bool operator()(Mat* mat,
-                          ProcLib lib = ProcLib::OPENCV_CPU);
-};
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/cast.cc b/csrcs/fastdeploy/vision/common/processors/cast.cc
deleted file mode 100644
index b9a757f14..000000000
--- a/csrcs/fastdeploy/vision/common/processors/cast.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/cast.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool Cast::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  int c = im->channels();
-  if (dtype_ == "float") {
-    if (im->type() != CV_32FC(c)) {
-      im->convertTo(*im, CV_32FC(c));
-    }
-  } else if (dtype_ == "double") {
-    if (im->type() != CV_64FC(c)) {
-      im->convertTo(*im, CV_64FC(c));
-    }
-  } else {
-    FDWARNING << "Cast not support for " << dtype_
-              << " now! will skip this operation." << std::endl;
-  }
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Cast::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int c = im->channels();
-  if (dtype_ == "float") {
-    if (im->type() != CV_32FC(c)) {
-      im->convertTo(*im, CV_32FC(c));
-    }
-  } else if (dtype_ == "double") {
-    if (im->type() != CV_64FC(c)) {
-      im->convertTo(*im, CV_64FC(c));
-    }
-  } else {
-    FDWARNING << "Cast not support for " << dtype_
-              << " now! will skip this operation." << std::endl;
-  }
-  return true;
-}
-#endif
-
-bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) {
-  auto c = Cast(dtype);
-  return c(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/cast.h b/csrcs/fastdeploy/vision/common/processors/cast.h
deleted file mode 100644
index 1111f08a6..000000000
--- a/csrcs/fastdeploy/vision/common/processors/cast.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class Cast : public Processor {
- public:
-  explicit Cast(const std::string& dtype = "float") : dtype_(dtype) {}
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Cast"; }
-  static bool Run(Mat* mat, const std::string& dtype,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  std::string dtype_;
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/center_crop.cc b/csrcs/fastdeploy/vision/common/processors/center_crop.cc
deleted file mode 100644
index 27b86ca2d..000000000
--- a/csrcs/fastdeploy/vision/common/processors/center_crop.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/center_crop.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool CenterCrop::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  int height = static_cast<int>(im->rows);
-  int width = static_cast<int>(im->cols);
-  if (height < height_ || width < width_) {
-    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
-    return false;
-  }
-  int offset_x = static_cast<int>((width - width_) / 2);
-  int offset_y = static_cast<int>((height - height_) / 2);
-  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
-  *im = (*im)(crop_roi);
-  mat->SetWidth(width_);
-  mat->SetHeight(height_);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool CenterCrop::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int height = static_cast<int>(im->rows);
-  int width = static_cast<int>(im->cols);
-  if (height < height_ || width < width_) {
-    FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
-    return false;
-  }
-  int offset_x = static_cast<int>((width - width_) / 2);
-  int offset_y = static_cast<int>((height - height_) / 2);
-  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
-  *im = (*im)(crop_roi);
-  mat->SetWidth(width_);
-  mat->SetHeight(height_);
-  return true;
-}
-#endif
-
-bool CenterCrop::Run(Mat* mat, const int& width, const int& height,
-                     ProcLib lib) {
-  auto c = CenterCrop(width, height);
-  return c(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/center_crop.h b/csrcs/fastdeploy/vision/common/processors/center_crop.h
deleted file mode 100644
index 86ad0e20d..000000000
--- a/csrcs/fastdeploy/vision/common/processors/center_crop.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class CenterCrop : public Processor {
- public:
-  CenterCrop(int width, int height) : height_(height), width_(width) {}
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "CenterCrop"; }
-
-  static bool Run(Mat* mat, const int& width, const int& height,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int height_;
-  int width_;
-};
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc b/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc
deleted file mode 100644
index bcdaf365a..000000000
--- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/color_space_convert.h"
-
-namespace fastdeploy {
-namespace vision {
-bool BGR2RGB::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool BGR2RGB::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
-  return true;
-}
-#endif
-
-bool RGB2BGR::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool RGB2BGR::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
-  return true;
-}
-#endif
-
-bool BGR2RGB::Run(Mat* mat, ProcLib lib) {
-  auto b = BGR2RGB();
-  return b(mat, lib);
-}
-
-bool RGB2BGR::Run(Mat* mat, ProcLib lib) {
-  auto r = RGB2BGR();
-  return r(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/color_space_convert.h b/csrcs/fastdeploy/vision/common/processors/color_space_convert.h
deleted file mode 100644
index 472bcf16d..000000000
--- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class BGR2RGB : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  virtual std::string Name() { return "BGR2RGB"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-
-class RGB2BGR : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "RGB2BGR"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/convert.cc b/csrcs/fastdeploy/vision/common/processors/convert.cc
deleted file mode 100644
index a7ca6de07..000000000
--- a/csrcs/fastdeploy/vision/common/processors/convert.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/convert.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-Convert::Convert(const std::vector<float>& alpha,
-                 const std::vector<float>& beta) {
-  FDASSERT(alpha.size() == beta.size(),
-           "Convert: requires the size of alpha equal to the size of beta.");
-  FDASSERT(alpha.size() != 0,
-           "Convert: requires the size of alpha and beta > 0.");
-  alpha_.assign(alpha.begin(), alpha.end());
-  beta_.assign(beta.begin(), beta.end());
-}
-
-bool Convert::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  std::vector<cv::Mat> split_im;
-  cv::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::merge(split_im, *im);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Convert::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  std::vector<cv::cuda::GpuMat> split_im;
-  cv::cuda::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::cuda::merge(split_im, *im);
-  return true;
-}
-#endif
-
-bool Convert::Run(Mat* mat, const std::vector<float>& alpha,
-                  const std::vector<float>& beta, ProcLib lib) {
-  auto c = Convert(alpha, beta);
-  return c(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/common/processors/convert.h b/csrcs/fastdeploy/vision/common/processors/convert.h
deleted file mode 100644
index 5d5a5276f..000000000
--- a/csrcs/fastdeploy/vision/common/processors/convert.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-class Convert : public Processor {
- public:
-  Convert(const std::vector<float>& alpha, const std::vector<float>& beta);
-
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Convert"; }
-
-  // Compute `result = mat * alpha + beta` directly by channel.
-  // The default behavior is the same as OpenCV's convertTo method.
-  static bool Run(Mat* mat, const std::vector<float>& alpha,
-                  const std::vector<float>& beta,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  std::vector<float> alpha_;
-  std::vector<float> beta_;
-};
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc b/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc
deleted file mode 100644
index 5bea87e18..000000000
--- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/hwc2chw.h"
-
-namespace fastdeploy {
-namespace vision {
-bool HWC2CHW::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Mat im_clone = im->clone();
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-
-  //  float* data = reinterpret_cast<float*>(im->data);
-  for (int i = 0; i < rc; ++i) {
-    //    cv::extractChannel(im_clone, cv::Mat(rh, rw, im->type() % 8, data + i
-    //    * rh * rw),
-    //                       i);
-    cv::extractChannel(
-        im_clone,
-        cv::Mat(rh, rw, im->type() % 8,
-                im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())),
-        i);
-  }
-  mat->layout = Layout::CHW;
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool HWC2CHW::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::cuda::GpuMat im_clone = im->clone();
-  int rh = im->rows;
-  int rw = im->cols;
-  int rc = im->channels();
-  int num_pixels = rh * rw;
-  std::vector<cv::cuda::GpuMat> channels{
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[0])),
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels])),
-      cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels * 2]))};
-  cv::cuda::split(im_clone, channels);
-  mat->layout = Layout::CHW;
-  return true;
-}
-#endif
-
-bool HWC2CHW::Run(Mat* mat, ProcLib lib) {
-  auto h = HWC2CHW();
-  return h(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/hwc2chw.h b/csrcs/fastdeploy/vision/common/processors/hwc2chw.h
deleted file mode 100644
index 56fa3ede8..000000000
--- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class HWC2CHW : public Processor {
- public:
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "HWC2CHW"; }
-
-  static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/mat.cc b/csrcs/fastdeploy/vision/common/processors/mat.cc
deleted file mode 100644
index 2afffa416..000000000
--- a/csrcs/fastdeploy/vision/common/processors/mat.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/vision/common/processors/mat.h"
-#include "fastdeploy/utils/utils.h"
-namespace fastdeploy {
-namespace vision {
-
-#ifdef ENABLE_OPENCV_CUDA
-cv::cuda::GpuMat* Mat::GetGpuMat() {
-  if (device == Device::CPU) {
-    gpu_mat.upload(cpu_mat);
-  }
-  return &gpu_mat;
-}
-#endif
-
-cv::Mat* Mat::GetCpuMat() {
-#ifdef ENABLE_OPENCV_CUDA
-  if (device == Device::GPU) {
-    gpu_mat.download(cpu_mat);
-  }
-#endif
-  return &cpu_mat;
-}
-
-void Mat::ShareWithTensor(FDTensor* tensor) {
-  if (device == Device::GPU) {
-#ifdef ENABLE_OPENCV_CUDA
-    tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
-                            GetGpuMat()->ptr());
-    tensor->device = Device::GPU;
-#endif
-  } else {
-    tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
-                            GetCpuMat()->ptr());
-    tensor->device = Device::CPU;
-  }
-  if (layout == Layout::HWC) {
-    tensor->shape = {Height(), Width(), Channels()};
-  }
-}
-
-bool Mat::CopyToTensor(FDTensor* tensor) {
-  cv::Mat* im = GetCpuMat();
-  int total_bytes = im->total() * im->elemSize();
-  if (total_bytes != tensor->Nbytes()) {
-    FDERROR << "While copy Mat to Tensor, requires the memory size be same, "
-               "but now size of Tensor = "
-            << tensor->Nbytes() << ", size of Mat = " << total_bytes << "."
-            << std::endl;
-    return false;
-  }
-  memcpy(tensor->MutableData(), im->ptr(), im->total() * im->elemSize());
-  return true;
-}
-
-void Mat::PrintInfo(const std::string& flag) {
-  cv::Mat* im = GetCpuMat();
-  cv::Scalar mean = cv::mean(*im);
-  std::cout << flag << ": "
-            << "Channel=" << Channels() << ", height=" << Height()
-            << ", width=" << Width() << ", mean=";
-  for (int i = 0; i < Channels(); ++i) {
-    std::cout << mean[i] << " ";
-  }
-  std::cout << std::endl;
-}
-
-FDDataType Mat::Type() {
-  int type = -1;
-  if (device == Device::GPU) {
-#ifdef ENABLE_OPENCV_CUDA
-    type = gpu_mat.type();
-#endif
-  } else {
-    type = cpu_mat.type();
-  }
-  if (type < 0) {
-    FDASSERT(false,
-             "While calling Mat::Type(), get negative value, which is not "
-             "expected!.");
-  }
-  type = type % 8;
-  if (type == 0) {
-    return FDDataType::UINT8;
-  } else if (type == 1) {
-    return FDDataType::INT8;
-  } else if (type == 2) {
-    FDASSERT(false, "While calling Mat::Type(), get UINT16 type which is not "
-                    "supported now.");
-  } else if (type == 3) {
-    return FDDataType::INT16;
-  } else if (type == 4) {
-    return FDDataType::INT32;
-  } else if (type == 5) {
-    return FDDataType::FP32;
-  } else if (type == 6) {
-    return FDDataType::FP64;
-  } else {
-    FDASSERT(false, "While calling Mat::Type(), get type = " +
-                        std::to_string(type) + ", which is not expected!.");
-  }
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/mat.h b/csrcs/fastdeploy/vision/common/processors/mat.h
deleted file mode 100644
index cf4736238..000000000
--- a/csrcs/fastdeploy/vision/common/processors/mat.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/core/fd_tensor.h"
-#include "opencv2/core/core.hpp"
-
-#ifdef ENABLE_OPENCV_CUDA
-#include "opencv2/core/cuda.hpp"
-#include "opencv2/cudaarithm.hpp"
-#include "opencv2/cudaimgproc.hpp"
-#include "opencv2/cudawarping.hpp"
-#endif
-
-namespace fastdeploy {
-namespace vision {
-
-enum Layout { HWC, CHW };
-
-struct FASTDEPLOY_DECL Mat {
-  explicit Mat(cv::Mat& mat) {
-    cpu_mat = mat;
-    device = Device::CPU;
-    layout = Layout::HWC;
-    height = cpu_mat.rows;
-    width = cpu_mat.cols;
-    channels = cpu_mat.channels();
-  }
-
- private:
-  int channels;
-  int height;
-  int width;
-  cv::Mat cpu_mat;
-#ifdef ENABLE_OPENCV_CUDA
-  cv::cuda::GpuMat gpu_mat;
-#endif
-
- public:
-#ifdef ENABLE_OPENCV_CUDA
-  cv::cuda::GpuMat* GetGpuMat();
-#endif
-  cv::Mat* GetCpuMat();
-
-  FDDataType Type();
-  int Channels() const { return channels; }
-  int Width() const { return width; }
-  int Height() const { return height; }
-  void SetChannels(int s) { channels = s; }
-  void SetWidth(int w) { width = w; }
-  void SetHeight(int h) { height = h; }
-
-  // Transfer the vision::Mat to FDTensor
-  void ShareWithTensor(FDTensor* tensor);
-  // Only support copy to cpu tensor now
-  bool CopyToTensor(FDTensor* tensor);
-
-  // debug functions
-  // TODO(jiangjiajun) Develop a right process pipeline with c++ is not a easy
-  // things
-  // Will add more debug function here to help debug processed image
-  // This function will print shape / mean of each channels of the Mat
-  void PrintInfo(const std::string& flag);
-
-  Layout layout = Layout::HWC;
-  Device device = Device::CPU;
-};
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/normalize.cc b/csrcs/fastdeploy/vision/common/processors/normalize.cc
deleted file mode 100644
index b75406070..000000000
--- a/csrcs/fastdeploy/vision/common/processors/normalize.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/normalize.h"
-
-namespace fastdeploy {
-namespace vision {
-Normalize::Normalize(const std::vector<float>& mean,
-                     const std::vector<float>& std, bool is_scale,
-                     const std::vector<float>& min,
-                     const std::vector<float>& max) {
-  FDASSERT(mean.size() == std.size(),
-           "Normalize: requires the size of mean equal to the size of std.");
-  std::vector<double> mean_(mean.begin(), mean.end());
-  std::vector<double> std_(std.begin(), std.end());
-  std::vector<double> min_(mean.size(), 0.0);
-  std::vector<double> max_(mean.size(), 255.0);
-  if (min.size() != 0) {
-    FDASSERT(
-        min.size() == mean.size(),
-        "Normalize: while min is defined, requires the size of min equal to "
-        "the size of mean.");
-    min_.assign(min.begin(), min.end());
-  }
-  if (max.size() != 0) {
-    FDASSERT(
-        min.size() == mean.size(),
-        "Normalize: while max is defined, requires the size of max equal to "
-        "the size of mean.");
-    max_.assign(max.begin(), max.end());
-  }
-  for (auto c = 0; c < mean_.size(); ++c) {
-    double alpha = 1.0;
-    if (is_scale) {
-      alpha /= (max_[c] - min_[c]);
-    }
-    double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c];
-    alpha /= std_[c];
-    alpha_.push_back(alpha);
-    beta_.push_back(beta);
-  }
-}
-
-bool Normalize::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  std::vector<cv::Mat> split_im;
-  cv::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::merge(split_im, *im);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Normalize::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  std::vector<cv::cuda::GpuMat> split_im;
-  cv::cuda::split(*im, split_im);
-  for (int c = 0; c < im->channels(); c++) {
-    split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
-  }
-  cv::cuda::merge(split_im, *im);
-  return true;
-}
-#endif
-
-bool Normalize::Run(Mat* mat, const std::vector<float>& mean,
-                    const std::vector<float>& std, bool is_scale,
-                    const std::vector<float>& min,
-                    const std::vector<float>& max, ProcLib lib) {
-  auto n = Normalize(mean, std, is_scale, min, max);
-  return n(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/normalize.h b/csrcs/fastdeploy/vision/common/processors/normalize.h
deleted file mode 100644
index b8a66e945..000000000
--- a/csrcs/fastdeploy/vision/common/processors/normalize.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-class Normalize : public Processor {
- public:
-  Normalize(const std::vector<float>& mean, const std::vector<float>& std,
-            bool is_scale = true,
-            const std::vector<float>& min = std::vector<float>(),
-            const std::vector<float>& max = std::vector<float>());
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Normalize"; }
-
-  // While use normalize, it is more recommend not use this function
-  // this function will need to compute result = ((mat / 255) - mean) / std
-  // if we use the following method
-  // ```
-  // auto norm = Normalize(...)
-  // norm(mat)
-  // ```
-  // There will be some precomputation in contruct function
-  // and the `norm(mat)` only need to compute result = mat * alpha + beta
-  // which will reduce lots of time
-  static bool Run(Mat* mat, const std::vector<float>& mean,
-                  const std::vector<float>& std, bool is_scale = true,
-                  const std::vector<float>& min = std::vector<float>(),
-                  const std::vector<float>& max = std::vector<float>(),
-                  ProcLib lib = ProcLib::OPENCV_CPU);
- private:
-  std::vector<float> alpha_;
-  std::vector<float> beta_;
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/pad.cc b/csrcs/fastdeploy/vision/common/processors/pad.cc
deleted file mode 100644
index 3b26d28bc..000000000
--- a/csrcs/fastdeploy/vision/common/processors/pad.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/pad.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool Pad::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "Pad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR << "Pad: Require input channels equals to size of padding value, "
-               "but now channels = "
-            << mat->Channels()
-            << ", the size of padding values = " << value_.size() << "."
-            << std::endl;
-    return false;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
-                     cv::BORDER_CONSTANT, value);
-  mat->SetHeight(im->rows);
-  mat->SetWidth(im->cols);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Pad::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "Pad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR << "Pad: Require input channels equals to size of padding value, "
-               "but now channels = "
-            << mat->Channels()
-            << ", the size of padding values = " << value_.size() << "."
-            << std::endl;
-    return false;
-  }
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  cv::cuda::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
-                           cv::BORDER_CONSTANT, value);
-  mat->SetHeight(im->rows);
-  mat->SetWidth(im->cols);
-  return true;
-}
-#endif
-
-bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left,
-              const int& right, const std::vector<float>& value,
-              ProcLib lib) {
-  auto p = Pad(top, bottom, left, right, value);
-  return p(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/pad.h b/csrcs/fastdeploy/vision/common/processors/pad.h
deleted file mode 100644
index 110365960..000000000
--- a/csrcs/fastdeploy/vision/common/processors/pad.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class Pad : public Processor {
- public:
-  Pad(int top, int bottom, int left, int right,
-      const std::vector<float>& value) {
-    top_ = top;
-    bottom_ = bottom;
-    left_ = left;
-    right_ = right;
-    value_ = value;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Pad"; }
-
-  static bool Run(Mat* mat, const int& top, const int& bottom, const int& left,
-                  const int& right, const std::vector<float>& value,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int top_;
-  int bottom_;
-  int left_;
-  int right_;
-  std::vector<float> value_;
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc b/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc
deleted file mode 100644
index d4cbacd87..000000000
--- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/pad_to_size.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool PadToSize::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "PadToSize: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "PadToSize: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  if (origin_w > width_) {
-    FDERROR << "PadToSize: the input width:" << origin_w
-            << " is greater than the target width: " << width_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_h > height_) {
-    FDERROR << "PadToSize: the input height:" << origin_h
-            << " is greater than the target height: " << height_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_w == width_ && origin_h == height_) {
-    return true;
-  }
-
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  // top, bottom, left, right
-  cv::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0, width_ - origin_w,
-                     cv::BORDER_CONSTANT, value);
-  mat->SetHeight(height_);
-  mat->SetWidth(width_);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool PadToSize::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "PadToSize: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "PadToSize: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  if (origin_w > width_) {
-    FDERROR << "PadToSize: the input width:" << origin_w
-            << " is greater than the target width: " << width_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_h > height_) {
-    FDERROR << "PadToSize: the input height:" << origin_h
-            << " is greater than the target height: " << height_ << "."
-            << std::endl;
-    return false;
-  }
-  if (origin_w == width_ && origin_h == height_) {
-    return true;
-  }
-
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-
-  // top, bottom, left, right
-  cv::cuda::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0,
-                           width_ - origin_w, cv::BORDER_CONSTANT, value);
-  mat->SetHeight(height_);
-  mat->SetWidth(width_);
-  return true;
-}
-#endif
-
-bool PadToSize::Run(Mat* mat, int width, int height,
-                    const std::vector<float>& value, ProcLib lib) {
-  auto p = PadToSize(width, height, value);
-  return p(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/pad_to_size.h b/csrcs/fastdeploy/vision/common/processors/pad_to_size.h
deleted file mode 100644
index ece0158f7..000000000
--- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class PadToSize : public Processor {
- public:
-  // only support pad with left-top padding mode
-  PadToSize(int width, int height, const std::vector<float>& value) {
-    width_ = width;
-    height_ = height;
-    value_ = value;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "PadToSize"; }
-
-  static bool Run(Mat* mat, int width, int height,
-                  const std::vector<float>& value,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int width_;
-  int height_;
-  std::vector<float> value_;
-};
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/resize.cc b/csrcs/fastdeploy/vision/common/processors/resize.cc
deleted file mode 100644
index d6b8b9e2f..000000000
--- a/csrcs/fastdeploy/vision/common/processors/resize.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/resize.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool Resize::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Resize: The format of input is not HWC." << std::endl;
-    return false;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  int origin_w = im->cols;
-  int origin_h = im->rows;
-  if (width_ > 0 && height_ > 0) {
-    if (use_scale_) {
-      float scale_w = width_ * 1.0 / origin_w;
-      float scale_h = height_ * 1.0 / origin_h;
-      cv::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_);
-    } else {
-      cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_);
-    }
-  } else if (scale_w_ > 0 && scale_h_ > 0) {
-    cv::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_);
-  } else {
-    FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) "
-               "or (scale_w > 0 && scale_h > 0)."
-            << std::endl;
-    return false;
-  }
-  mat->SetWidth(im->cols);
-  mat->SetHeight(im->rows);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool Resize::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "Resize: The format of input is not HWC." << std::endl;
-    return false;
-  }
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int origin_w = im->cols;
-  int origin_h = im->rows;
-  if (width_ > 0 && height_ > 0) {
-    if (use_scale_) {
-      float scale_w = width_ * 1.0 / origin_w;
-      float scale_h = height_ * 1.0 / origin_h;
-      cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_);
-    } else {
-      cv::cuda::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_);
-    }
-  } else if (scale_w_ > 0 && scale_h_ > 0) {
-    cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_);
-  } else {
-    FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) "
-               "or (scale_w > 0 && scale_h > 0)."
-            << std::endl;
-    return false;
-  }
-  mat->SetWidth(im->cols);
-  mat->SetHeight(im->rows);
-  return true;
-}
-#endif
-
-bool Resize::Run(Mat* mat, int width, int height, float scale_w, float scale_h,
-                 int interp, bool use_scale, ProcLib lib) {
-  if (mat->Height() == height && mat->Width() == width) {
-    return true;
-  }
-  auto r = Resize(width, height, scale_w, scale_h, interp, use_scale);
-  return r(mat, lib);
-}
-
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/resize.h b/csrcs/fastdeploy/vision/common/processors/resize.h
deleted file mode 100644
index 5b6e9c025..000000000
--- a/csrcs/fastdeploy/vision/common/processors/resize.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class Resize : public Processor {
- public:
-  Resize(int width, int height, float scale_w = -1.0, float scale_h = -1.0,
-         int interp = 1, bool use_scale = false) {
-    width_ = width;
-    height_ = height;
-    scale_w_ = scale_w;
-    scale_h_ = scale_h;
-    interp_ = interp;
-    use_scale_ = use_scale;
-  }
-
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "Resize"; }
-
-  static bool Run(Mat* mat, int width, int height, float scale_w = -1.0,
-                  float scale_h = -1.0, int interp = 1, bool use_scale = false,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
-  bool SetWidthAndHeight(int width, int height) {
-    width_ = width;
-    height_ = height;
-    return true;
-  }
-
-  std::tuple<int, int> GetWidthAndHeight() {
-    return std::make_tuple(width_, height_);
-  }
-
- private:
-  int width_;
-  int height_;
-  float scale_w_ = -1.0;
-  float scale_h_ = -1.0;
-  int interp_ = 1;
-  bool use_scale_ = false;
-};
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc b/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc
deleted file mode 100644
index 8e850425f..000000000
--- a/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/resize_by_short.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool ResizeByShort::CpuRun(Mat* mat) {
-  cv::Mat* im = mat->GetCpuMat();
-  int origin_w = im->cols;
-  int origin_h = im->rows;
-  double scale = GenerateScale(origin_w, origin_h);
-  if (use_scale_) {
-    cv::resize(*im, *im, cv::Size(), scale, scale, interp_);
-  } else {
-    int width = static_cast<int>(round(scale * im->cols));
-    int height = static_cast<int>(round(scale * im->rows));
-    cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
-  }
-  mat->SetWidth(im->cols);
-  mat->SetHeight(im->rows);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool ResizeByShort::GpuRun(Mat* mat) {
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  int origin_w = im->cols;
-  int origin_h = im->rows;
-  double scale = GenerateScale(origin_w, origin_h);
-  im->convertTo(*im, CV_32FC(im->channels()));
-  if (use_scale_) {
-    cv::cuda::resize(*im, *im, cv::Size(), scale, scale, interp_);
-  } else {
-    int width = static_cast<int>(round(scale * im->cols));
-    int height = static_cast<int>(round(scale * im->rows));
-    cv::cuda::resize(*im, *im, cv::Size(width, height), 0, 0, interp_);
-  }
-  mat->SetWidth(im->cols);
-  mat->SetHeight(im->rows);
-  return true;
-}
-#endif
-
-double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) {
-  int im_size_max = std::max(origin_w, origin_h);
-  int im_size_min = std::min(origin_w, origin_h);
-  double scale =
-      static_cast<double>(target_size_) / static_cast<double>(im_size_min);
-  if (max_size_ > 0) {
-    if (round(scale * im_size_max) > max_size_) {
-      scale = static_cast<double>(max_size_) / static_cast<double>(im_size_max);
-    }
-  }
-  return scale;
-}
-
-bool ResizeByShort::Run(Mat* mat, int target_size, int interp, bool use_scale,
-                        int max_size, ProcLib lib) {
-  auto r = ResizeByShort(target_size, interp, use_scale, max_size);
-  return r(mat, lib);
-}
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/resize_by_short.h b/csrcs/fastdeploy/vision/common/processors/resize_by_short.h
deleted file mode 100644
index 023748e9e..000000000
--- a/csrcs/fastdeploy/vision/common/processors/resize_by_short.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class ResizeByShort : public Processor {
- public:
-  ResizeByShort(int target_size, int interp = 1, bool use_scale = true,
-                int max_size = -1) {
-    target_size_ = target_size;
-    max_size_ = max_size;
-    interp_ = interp;
-    use_scale_ = use_scale;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "ResizeByShort"; }
-
-  static bool Run(Mat* mat, int target_size, int interp = 1,
-                  bool use_scale = true, int max_size = -1,
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  double GenerateScale(const int origin_w, const int origin_h);
-  int target_size_;
-  int max_size_;
-  int interp_;
-  bool use_scale_;
-};
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/stride_pad.cc b/csrcs/fastdeploy/vision/common/processors/stride_pad.cc
deleted file mode 100644
index 8597c8375..000000000
--- a/csrcs/fastdeploy/vision/common/processors/stride_pad.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/common/processors/stride_pad.h"
-
-namespace fastdeploy {
-namespace vision {
-
-bool StridePad::CpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "StridePad: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "StridePad: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-
-  int pad_h = (mat->Height() / stride_) * stride_ +
-              (mat->Height() % stride_ != 0) * stride_ - mat->Height();
-  int pad_w = (mat->Width() / stride_) * stride_ +
-              (mat->Width() % stride_ != 0) * stride_ - mat->Width();
-  if (pad_h == 0 && pad_w == 0) {
-    return true;
-  }
-  cv::Mat* im = mat->GetCpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-  // top, bottom, left, right
-  cv::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, value);
-  mat->SetHeight(origin_h + pad_h);
-  mat->SetWidth(origin_w + pad_w);
-  return true;
-}
-
-#ifdef ENABLE_OPENCV_CUDA
-bool StridePad::GpuRun(Mat* mat) {
-  if (mat->layout != Layout::HWC) {
-    FDERROR << "StridePad: The input data must be Layout::HWC format!"
-            << std::endl;
-    return false;
-  }
-  if (mat->Channels() > 4) {
-    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
-    return false;
-  }
-  if (mat->Channels() != value_.size()) {
-    FDERROR
-        << "StridePad: Require input channels equals to size of padding value, "
-           "but now channels = "
-        << mat->Channels() << ", the size of padding values = " << value_.size()
-        << "." << std::endl;
-    return false;
-  }
-
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  int pad_h = (mat->Height() / stride_) * stride_ +
-              (mat->Height() % stride_ != 0) * stride_;
-  int pad_w = (mat->Width() / stride_) * stride_ +
-              (mat->Width() % stride_ != 0) * stride_;
-  if (pad_h == 0 && pad_w == 0) {
-    return true;
-  }
-
-  cv::cuda::GpuMat* im = mat->GetGpuMat();
-  cv::Scalar value;
-  if (value_.size() == 1) {
-    value = cv::Scalar(value_[0]);
-  } else if (value_.size() == 2) {
-    value = cv::Scalar(value_[0], value_[1]);
-  } else if (value_.size() == 3) {
-    value = cv::Scalar(value_[0], value_[1], value_[2]);
-  } else {
-    value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
-  }
-
-  // top, bottom, left, right
-  cv::cuda::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT,
-                           value);
-  mat->SetHeight(origin_h + pad_h);
-  mat->SetWidth(origin_w + pad_w);
-  return true;
-}
-#endif
-
-bool StridePad::Run(Mat* mat, int stride, const std::vector<float>& value,
-                    ProcLib lib) {
-  auto p = StridePad(stride, value);
-  return p(mat, lib);
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/stride_pad.h b/csrcs/fastdeploy/vision/common/processors/stride_pad.h
deleted file mode 100644
index c002ca697..000000000
--- a/csrcs/fastdeploy/vision/common/processors/stride_pad.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/base.h"
-
-namespace fastdeploy {
-namespace vision {
-
-class StridePad : public Processor {
- public:
-  // only support pad with left-top padding mode
-  StridePad(int stride, const std::vector<float>& value) {
-    stride_ = stride;
-    value_ = value;
-  }
-  bool CpuRun(Mat* mat);
-#ifdef ENABLE_OPENCV_CUDA
-  bool GpuRun(Mat* mat);
-#endif
-  std::string Name() { return "StridePad"; }
-
-  static bool Run(Mat* mat, int stride,
-                  const std::vector<float>& value = std::vector<float>(),
-                  ProcLib lib = ProcLib::OPENCV_CPU);
-
- private:
-  int stride_ = 32;
-  std::vector<float> value_;
-};
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/processors/transform.h b/csrcs/fastdeploy/vision/common/processors/transform.h
deleted file mode 100644
index fed3d0c9a..000000000
--- a/csrcs/fastdeploy/vision/common/processors/transform.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/vision/common/processors/cast.h"
-#include "fastdeploy/vision/common/processors/center_crop.h"
-#include "fastdeploy/vision/common/processors/color_space_convert.h"
-#include "fastdeploy/vision/common/processors/convert.h"
-#include "fastdeploy/vision/common/processors/hwc2chw.h"
-#include "fastdeploy/vision/common/processors/normalize.h"
-#include "fastdeploy/vision/common/processors/pad.h"
-#include "fastdeploy/vision/common/processors/pad_to_size.h"
-#include "fastdeploy/vision/common/processors/resize.h"
-#include "fastdeploy/vision/common/processors/resize_by_short.h"
-#include "fastdeploy/vision/common/processors/stride_pad.h"
diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc
deleted file mode 100644
index 854d6fcab..000000000
--- a/csrcs/fastdeploy/vision/common/result.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-
-void ClassifyResult::Clear() {
-  std::vector<int32_t>().swap(label_ids);
-  std::vector<float>().swap(scores);
-}
-
-std::string ClassifyResult::Str() {
-  std::string out;
-  out = "ClassifyResult(\nlabel_ids: ";
-  for (size_t i = 0; i < label_ids.size(); ++i) {
-    out = out + std::to_string(label_ids[i]) + ", ";
-  }
-  out += "\nscores: ";
-  for (size_t i = 0; i < label_ids.size(); ++i) {
-    out = out + std::to_string(scores[i]) + ", ";
-  }
-  out += "\n)";
-  return out;
-}
-
-DetectionResult::DetectionResult(const DetectionResult& res) {
-  boxes.assign(res.boxes.begin(), res.boxes.end());
-  scores.assign(res.scores.begin(), res.scores.end());
-  label_ids.assign(res.label_ids.begin(), res.label_ids.end());
-}
-
-void DetectionResult::Clear() {
-  std::vector<std::array<float, 4>>().swap(boxes);
-  std::vector<float>().swap(scores);
-  std::vector<int32_t>().swap(label_ids);
-}
-
-void DetectionResult::Reserve(int size) {
-  boxes.reserve(size);
-  scores.reserve(size);
-  label_ids.reserve(size);
-}
-
-void DetectionResult::Resize(int size) {
-  boxes.resize(size);
-  scores.resize(size);
-  label_ids.resize(size);
-}
-
-std::string DetectionResult::Str() {
-  std::string out;
-  out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n";
-  for (size_t i = 0; i < boxes.size(); ++i) {
-    out = out + std::to_string(boxes[i][0]) + "," +
-          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
-          ", " + std::to_string(boxes[i][3]) + ", " +
-          std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]) +
-          "\n";
-  }
-  return out;
-}
-
-FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) {
-  boxes.assign(res.boxes.begin(), res.boxes.end());
-  landmarks.assign(res.landmarks.begin(), res.landmarks.end());
-  scores.assign(res.scores.begin(), res.scores.end());
-  landmarks_per_face = res.landmarks_per_face;
-}
-
-void FaceDetectionResult::Clear() {
-  std::vector<std::array<float, 4>>().swap(boxes);
-  std::vector<float>().swap(scores);
-  std::vector<std::array<float, 2>>().swap(landmarks);
-  landmarks_per_face = 0;
-}
-
-void FaceDetectionResult::Reserve(int size) {
-  boxes.reserve(size);
-  scores.reserve(size);
-  if (landmarks_per_face > 0) {
-    landmarks.reserve(size * landmarks_per_face);
-  }
-}
-
-void FaceDetectionResult::Resize(int size) {
-  boxes.resize(size);
-  scores.resize(size);
-  if (landmarks_per_face > 0) {
-    landmarks.resize(size * landmarks_per_face);
-  }
-}
-
-std::string FaceDetectionResult::Str() {
-  std::string out;
-  // format without landmarks
-  if (landmarks_per_face <= 0) {
-    out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n";
-    for (size_t i = 0; i < boxes.size(); ++i) {
-      out = out + std::to_string(boxes[i][0]) + "," +
-            std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
-            ", " + std::to_string(boxes[i][3]) + ", " +
-            std::to_string(scores[i]) + "\n";
-    }
-    return out;
-  }
-  // format with landmarks
-  FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face),
-           "The size of landmarks != boxes.size * landmarks_per_face.");
-  out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " +
-        std::to_string(landmarks_per_face) + "]\n";
-  for (size_t i = 0; i < boxes.size(); ++i) {
-    out = out + std::to_string(boxes[i][0]) + "," +
-          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
-          ", " + std::to_string(boxes[i][3]) + ", " +
-          std::to_string(scores[i]) + ", ";
-    for (size_t j = 0; j < landmarks_per_face; ++j) {
-      out = out + "(" +
-            std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," +
-            std::to_string(landmarks[i * landmarks_per_face + j][1]);
-      if (j < landmarks_per_face - 1) {
-        out = out + "), ";
-      } else {
-        out = out + ")\n";
-      }
-    }
-  }
-  return out;
-}
-
-void SegmentationResult::Clear() {
-  std::vector<uint8_t>().swap(label_map);
-  std::vector<float>().swap(score_map);
-  std::vector<int64_t>().swap(shape);
-  contain_score_map = false;
-}
-
-void SegmentationResult::Reserve(int size) {
-  label_map.reserve(size);
-  if (contain_score_map > 0) {
-    score_map.reserve(size);
-  }
-}
-
-void SegmentationResult::Resize(int size) {
-  label_map.resize(size);
-  if (contain_score_map) {
-    score_map.resize(size);
-  }
-}
-
-std::string SegmentationResult::Str() {
-  std::string out;
-  out = "SegmentationResult Image masks 10 rows x 10 cols: \n";
-  for (size_t i = 0; i < 10; ++i) {
-    out += "[";
-    for (size_t j = 0; j < 10; ++j) {
-      out = out + std::to_string(label_map[i * 10 + j]) + ", ";
-    }
-    out += ".....]\n";
-  }
-  out += "...........\n";
-  if (contain_score_map) {
-    out += "SegmentationResult Score map 10 rows x 10 cols: \n";
-    for (size_t i = 0; i < 10; ++i) {
-      out += "[";
-      for (size_t j = 0; j < 10; ++j) {
-        out = out + std::to_string(score_map[i * 10 + j]) + ", ";
-      }
-      out += ".....]\n";
-    }
-    out += "...........\n";
-  }
-  out += "result shape is: [" + std::to_string(shape[0]) + " " +
-         std::to_string(shape[1]) + "]";
-  return out;
-}
-
-FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult& res) {
-  embedding.assign(res.embedding.begin(), res.embedding.end());
-}
-
-void FaceRecognitionResult::Clear() { std::vector<float>().swap(embedding); }
-
-void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); }
-
-void FaceRecognitionResult::Resize(int size) { embedding.resize(size); }
-
-std::string FaceRecognitionResult::Str() {
-  std::string out;
-  out = "FaceRecognitionResult: [";
-  size_t numel = embedding.size();
-  if (numel <= 0) {
-    return out + "Empty Result]";
-  }
-  // max, min, mean
-  float min_val = embedding.at(0);
-  float max_val = embedding.at(0);
-  float total_val = embedding.at(0);
-  for (size_t i = 1; i < numel; ++i) {
-    float val = embedding.at(i);
-    total_val += val;
-    if (val < min_val) {
-      min_val = val;
-    }
-    if (val > max_val) {
-      max_val = val;
-    }
-  }
-  float mean_val = total_val / static_cast<float>(numel);
-  out = out + "Dim(" + std::to_string(numel) + "), " + "Min(" +
-        std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) +
-        "), " + "Mean(" + std::to_string(mean_val) + ")]\n";
-  return out;
-}
-
-MattingResult::MattingResult(const MattingResult& res) {
-  alpha.assign(res.alpha.begin(), res.alpha.end());
-  foreground.assign(res.foreground.begin(), res.foreground.end());
-  shape.assign(res.shape.begin(), res.shape.end());
-  contain_foreground = res.contain_foreground;
-}
-
-void MattingResult::Clear() {
-  std::vector<float>().swap(alpha);
-  std::vector<float>().swap(foreground);
-  std::vector<int64_t>().swap(shape);
-  contain_foreground = false;
-}
-
-void MattingResult::Reserve(int size) {
-  alpha.reserve(size);
-  if (contain_foreground) {
-    FDASSERT((shape.size() == 3),
-             "Please initial shape (h,w,c) before call Reserve.");
-    int c = static_cast<int>(shape[3]);
-    foreground.reserve(size * c);
-  }
-}
-
-void MattingResult::Resize(int size) {
-  alpha.resize(size);
-  if (contain_foreground) {
-    FDASSERT((shape.size() == 3),
-             "Please initial shape (h,w,c) before call Resize.");
-    int c = static_cast<int>(shape[3]);
-    foreground.resize(size * c);
-  }
-}
-
-std::string MattingResult::Str() {
-  std::string out;
-  out = "MattingResult[";
-  if (contain_foreground) {
-    out += "Foreground(true)";
-  } else {
-    out += "Foreground(false)";
-  }
-  out += ", Alpha(";
-  size_t numel = alpha.size();
-  if (numel <= 0) {
-    return out + "[Empty Result]";
-  }
-  // max, min, mean
-  float min_val = alpha.at(0);
-  float max_val = alpha.at(0);
-  float total_val = alpha.at(0);
-  for (size_t i = 1; i < numel; ++i) {
-    float val = alpha.at(i);
-    total_val += val;
-    if (val < min_val) {
-      min_val = val;
-    }
-    if (val > max_val) {
-      max_val = val;
-    }
-  }
-  float mean_val = total_val / static_cast<float>(numel);
-  // shape
-  std::string shape_str = "Shape(";
-  for (size_t i = 0; i < shape.size(); ++i) {
-    if ((i + 1) != shape.size()) {
-      shape_str += std::to_string(shape[i]) + ",";
-    } else {
-      shape_str += std::to_string(shape[i]) + ")";
-    }
-  }
-  out = out + "Numel(" + std::to_string(numel) + "), " + shape_str + ", Min(" +
-        std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) +
-        "), " + "Mean(" + std::to_string(mean_val) + "))]\n";
-  return out;
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h
deleted file mode 100644
index f57178cee..000000000
--- a/csrcs/fastdeploy/vision/common/result.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "opencv2/core/core.hpp"
-
-namespace fastdeploy {
-namespace vision {
-enum FASTDEPLOY_DECL ResultType {
-  UNKNOWN_RESULT,
-  CLASSIFY,
-  DETECTION,
-  SEGMENTATION,
-  FACE_DETECTION,
-  FACE_RECOGNITION,
-  MATTING
-};
-
-struct FASTDEPLOY_DECL BaseResult {
-  ResultType type = ResultType::UNKNOWN_RESULT;
-};
-
-struct FASTDEPLOY_DECL ClassifyResult : public BaseResult {
-  std::vector<int32_t> label_ids;
-  std::vector<float> scores;
-  ResultType type = ResultType::CLASSIFY;
-
-  void Clear();
-  std::string Str();
-};
-
-struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
-  // box: xmin, ymin, xmax, ymax
-  std::vector<std::array<float, 4>> boxes;
-  std::vector<float> scores;
-  std::vector<int32_t> label_ids;
-  ResultType type = ResultType::DETECTION;
-
-  DetectionResult() {}
-  DetectionResult(const DetectionResult& res);
-
-  void Clear();
-
-  void Reserve(int size);
-
-  void Resize(int size);
-
-  std::string Str();
-};
-
-struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
-  // box: xmin, ymin, xmax, ymax
-  std::vector<std::array<float, 4>> boxes;
-  // landmark: x, y, landmarks may empty if the
-  // model don't detect face with landmarks.
-  // Note, one face might have multiple landmarks,
-  // such as 5/19/21/68/98/..., etc.
-  std::vector<std::array<float, 2>> landmarks;
-  std::vector<float> scores;
-  ResultType type = ResultType::FACE_DETECTION;
-  // set landmarks_per_face manually in your post processes.
-  int landmarks_per_face;
-
-  FaceDetectionResult() { landmarks_per_face = 0; }
-  FaceDetectionResult(const FaceDetectionResult& res);
-
-  void Clear();
-
-  void Reserve(int size);
-
-  void Resize(int size);
-
-  std::string Str();
-};
-
-struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
-  // mask
-  std::vector<uint8_t> label_map;
-  std::vector<float> score_map;
-  std::vector<int64_t> shape;
-  bool contain_score_map = false;
-
-  ResultType type = ResultType::SEGMENTATION;
-
-  void Clear();
-
-  void Reserve(int size);
-
-  void Resize(int size);
-
-  std::string Str();
-};
-
-struct FASTDEPLOY_DECL FaceRecognitionResult : public BaseResult {
-  // face embedding vector with 128/256/512 ... dim
-  std::vector<float> embedding;
-
-  ResultType type = ResultType::FACE_RECOGNITION;
-
-  FaceRecognitionResult() {}
-  FaceRecognitionResult(const FaceRecognitionResult& res);
-
-  void Clear();
-
-  void Reserve(int size);
-
-  void Resize(int size);
-
-  std::string Str();
-};
-
-struct FASTDEPLOY_DECL MattingResult : public BaseResult {
-  // alpha matte and fgr (predicted foreground: HWC/BGR float32)
-  std::vector<float> alpha;       // h x w
-  std::vector<float> foreground;  // h x w x c (c=3 default)
-  // height, width, channel for foreground and alpha
-  // must be (h,w,c) and setup before Reserve and Resize
-  // c is only for foreground if contain_foreground is true.
-  std::vector<int64_t> shape;
-  bool contain_foreground = false;
-
-  ResultType type = ResultType::MATTING;
-
-  MattingResult() {}
-  MattingResult(const MattingResult& res);
-
-  void Clear();
-
-  void Reserve(int size);
-
-  void Resize(int size);
-
-  std::string Str();
-};
-
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc
deleted file mode 100644
index 267012c11..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc
+++ /dev/null
@@ -1,355 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/nanodet_plus.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-struct NanoDetPlusCenterPoint {
-  int grid0;
-  int grid1;
-  int stride;
-};
-
-void GenerateNanoDetPlusCenterPoints(
-    const std::vector<int>& size, const std::vector<int>& downsample_strides,
-    std::vector<NanoDetPlusCenterPoint>* center_points) {
-  // size: tuple of input (width, height), e.g (320, 320)
-  // downsample_strides: downsample strides in NanoDet and
-  // NanoDet-Plus, e.g (8, 16, 32, 64)
-  const int width = size[0];
-  const int height = size[1];
-  for (const auto& ds : downsample_strides) {
-    int num_grid_w = width / ds;
-    int num_grid_h = height / ds;
-    for (int g1 = 0; g1 < num_grid_h; ++g1) {
-      for (int g0 = 0; g0 < num_grid_w; ++g0) {
-        (*center_points).emplace_back(NanoDetPlusCenterPoint{g0, g1, ds});
-      }
-    }
-  }
-}
-
-void WrapAndResize(Mat* mat, std::vector<int> size, std::vector<float> color,
-                   bool keep_ratio = false) {
-  // Reference: nanodet/data/transform/warp.py#L139
-  // size: tuple of input (width, height)
-  // The default value of `keep_ratio` is `fasle` in
-  // `config/nanodet-plus-m-1.5x_320.yml` for both
-  // train and val processes. So, we just let this
-  // option default `false` according to the official
-  // implementation in NanoDet and NanoDet-Plus.
-  // Note, this function will apply a normal resize
-  // operation to input Mat if the keep_ratio option
-  // is fasle and the behavior will be the same as
-  // yolov5's letterbox if keep_ratio is true.
-
-  // with keep_ratio = false (default)
-  if (!keep_ratio) {
-    int resize_h = size[1];
-    int resize_w = size[0];
-    if (resize_h != mat->Height() || resize_w != mat->Width()) {
-      Resize::Run(mat, resize_w, resize_h);
-    }
-    return;
-  }
-  // with keep_ratio = true, same as yolov5's letterbox
-  float r = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                     size[0] * 1.0f / static_cast<float>(mat->Width()));
-
-  int resize_h = int(round(static_cast<float>(mat->Height()) * r));
-  int resize_w = int(round(static_cast<float>(mat->Width()) * r));
-
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-void GFLRegression(const float* logits, size_t reg_num, float* offset) {
-  // Hint: reg_num = reg_max + 1
-  FDASSERT(((nullptr != logits) && (reg_num != 0)),
-           "NanoDetPlus: logits is nullptr or reg_num is 0 in GFLRegression.");
-  // softmax
-  float total_exp = 0.f;
-  std::vector<float> softmax_probs(reg_num);
-  for (size_t i = 0; i < reg_num; ++i) {
-    softmax_probs[i] = std::exp(logits[i]);
-    total_exp += softmax_probs[i];
-  }
-  for (size_t i = 0; i < reg_num; ++i) {
-    softmax_probs[i] = softmax_probs[i] / total_exp;
-  }
-  // gfl regression -> offset
-  for (size_t i = 0; i < reg_num; ++i) {
-    (*offset) += static_cast<float>(i) * softmax_probs[i];
-  }
-}
-
-NanoDetPlus::NanoDetPlus(const std::string& model_file,
-                         const std::string& params_file,
-                         const RuntimeOption& custom_option,
-                         const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool NanoDetPlus::Initialize() {
-  // parameters for preprocess
-  size = {320, 320};
-  padding_value = {0.0f, 0.0f, 0.0f};
-  keep_ratio = false;
-  downsample_strides = {8, 16, 32, 64};
-  max_wh = 4096.0f;
-  reg_max = 7;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  return true;
-}
-
-bool NanoDetPlus::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // NanoDet-Plus preprocess steps
-  // 1. WrapAndResize
-  // 2. HWC->CHW
-  // 3. Normalize or Convert (keep BGR order)
-  WrapAndResize(mat, size, padding_value, keep_ratio);
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  // Compute `result = mat * alpha + beta` directly by channel
-  // Reference: /config/nanodet-plus-m-1.5x_320.yml#L89
-  // from mean: [103.53, 116.28, 123.675], std: [57.375, 57.12, 58.395]
-  // x' = (x - mean) / std to x'= x * alpha + beta.
-  // e.g alpha[0] = 0.017429f = 1.0f / 57.375f
-  // e.g beta[0] = -103.53f * 0.0174291f
-  std::vector<float> alpha = {0.017429f, 0.017507f, 0.017125f};
-  std::vector<float> beta = {-103.53f * 0.0174291f, -116.28f * 0.0175070f,
-                             -123.675f * 0.0171247f};  // BGR order
-  Convert::Run(mat, alpha, beta);
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool NanoDetPlus::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  // generate center points with dowmsample strides
-  std::vector<NanoDetPlusCenterPoint> center_points;
-  GenerateNanoDetPlusCenterPoints(size, downsample_strides, &center_points);
-
-  // infer_result shape might look like (1,2125,112)
-  const int num_cls_reg = infer_result.shape[2];            // e.g 112
-  const int num_classes = num_cls_reg - (reg_max + 1) * 4;  // e.g 80
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    float* scores = data + i * num_cls_reg;
-    float* max_class_score = std::max_element(scores, scores + num_classes);
-    float confidence = (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(scores, max_class_score);
-    // fetch i-th center point
-    float grid0 = static_cast<float>(center_points.at(i).grid0);
-    float grid1 = static_cast<float>(center_points.at(i).grid1);
-    float downsample_stride = static_cast<float>(center_points.at(i).stride);
-    // apply gfl regression to get offsets (l,t,r,b)
-    float* logits = data + i * num_cls_reg + num_classes;  // 32|44...
-    std::vector<float> offsets(4);
-    for (size_t j = 0; j < 4; ++j) {
-      GFLRegression(logits + j * (reg_max + 1), reg_max + 1, &offsets[j]);
-    }
-    // convert from offsets to [x1, y1, x2, y2]
-    float l = offsets[0];  // left
-    float t = offsets[1];  // top
-    float r = offsets[2];  // right
-    float b = offsets[3];  // bottom
-
-    float x1 = (grid0 - l) * downsample_stride;  // cx - l x1
-    float y1 = (grid1 - t) * downsample_stride;  // cy - t y1
-    float x2 = (grid0 + r) * downsample_stride;  // cx + r x2
-    float y2 = (grid1 + b) * downsample_stride;  // cy + b y2
-
-    result->boxes.emplace_back(
-        std::array<float, 4>{x1 + label_id * max_wh, y1 + label_id * max_wh,
-                             x2 + label_id * max_wh, y2 + label_id * max_wh});
-    // label_id * max_wh for multi classes NMS
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  // without keep_ratio
-  if (!keep_ratio) {
-    // x' = (x / out_w) * ipt_w = x / (out_w / ipt_w)
-    // y' = (y / out_h) * ipt_h = y / (out_h / ipt_h)
-    float r_w = out_w / ipt_w;
-    float r_h = out_h / ipt_h;
-    for (size_t i = 0; i < result->boxes.size(); ++i) {
-      int32_t label_id = (result->label_ids)[i];
-      // clip box
-      result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-      result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-      result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-      result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-      result->boxes[i][0] = std::max(result->boxes[i][0] / r_w, 0.0f);
-      result->boxes[i][1] = std::max(result->boxes[i][1] / r_h, 0.0f);
-      result->boxes[i][2] = std::max(result->boxes[i][2] / r_w, 0.0f);
-      result->boxes[i][3] = std::max(result->boxes[i][3] / r_h, 0.0f);
-      result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-      result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-      result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-      result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-    }
-    return true;
-  }
-  // with keep_ratio
-  float r = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * r) / 2;
-  float pad_w = (out_w - ipt_w * r) / 2;
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / r, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / r, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / r, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / r, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool NanoDetPlus::Predict(cv::Mat* im, DetectionResult* result,
-                          float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h
deleted file mode 100644
index a407b8715..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-class FASTDEPLOY_DECL NanoDetPlus : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  NanoDetPlus(const std::string& model_file,
-              const std::string& params_file = "",
-              const RuntimeOption& custom_option = RuntimeOption(),
-              const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "nanodet"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.35f,
-                       float nms_iou_threshold = 0.5f);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of input size (width, height), e.g (320, 320)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // keep aspect ratio or not when perform resize operation.
-  // This option is set as `false` by default in NanoDet-Plus.
-  bool keep_ratio;
-  // downsample strides for NanoDet-Plus to generate anchors, will
-  // take (8, 16, 32, 64) as default values.
-  std::vector<int> downsample_strides;
-  // for offseting the boxes by classes when using NMS, default 4096.
-  float max_wh;
-  // reg_max for GFL regression, default 7
-  int reg_max;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // RangiLyu/nanodet official 'export_onnx.py' script will export static ONNX
-  // by default.
-  // This value will auto check by fastdeploy after the internal Runtime
-  // initialized.
-  bool is_dynamic_input_;
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc
deleted file mode 100644
index b415c0b3b..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindNanoDetPlus(pybind11::module& m) {
-  pybind11::class_<vision::detection::NanoDetPlus, FastDeployModel>(
-      m, "NanoDetPlus")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::NanoDetPlus& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::NanoDetPlus::size)
-      .def_readwrite("padding_value",
-                     &vision::detection::NanoDetPlus::padding_value)
-      .def_readwrite("keep_ratio", &vision::detection::NanoDetPlus::keep_ratio)
-      .def_readwrite("downsample_strides",
-                     &vision::detection::NanoDetPlus::downsample_strides)
-      .def_readwrite("max_wh", &vision::detection::NanoDetPlus::max_wh)
-      .def_readwrite("reg_max", &vision::detection::NanoDetPlus::reg_max);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc
deleted file mode 100644
index dff2118f3..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/scaledyolov4.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void ScaledYOLOv4::LetterBox(Mat* mat, const std::vector<int>& size,
-                             const std::vector<float>& color, bool _auto,
-                             bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
-                           const std::string& params_file,
-                           const RuntimeOption& custom_option,
-                           const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER};
-    valid_gpu_backends = {Backend::PDINFER};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool ScaledYOLOv4::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool ScaledYOLOv4::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // ScaledYOLOv4's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  ScaledYOLOv4::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                          is_scale_up, stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool ScaledYOLOv4::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result,
-                           float conf_threshold, float nms_iou_threshold) {
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h
deleted file mode 100644
index bb7ff0a28..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  ScaledYOLOv4(const std::string& model_file,
-               const std::string& params_file = "",
-               const RuntimeOption& custom_option = RuntimeOption(),
-               const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  virtual std::string ModelName() const { return "ScaledYOLOv4"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 对图片进行LetterBox处理
-  // mat 为读取到的原图
-  // size 为输入模型的图像尺寸
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc
deleted file mode 100644
index 3e8e43b9e..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindScaledYOLOv4(pybind11::module& m) {
-  pybind11::class_<vision::detection::ScaledYOLOv4, FastDeployModel>(
-      m, "ScaledYOLOv4")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::ScaledYOLOv4& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::ScaledYOLOv4::size)
-      .def_readwrite("padding_value",
-                     &vision::detection::ScaledYOLOv4::padding_value)
-      .def_readwrite("is_mini_pad",
-                     &vision::detection::ScaledYOLOv4::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::ScaledYOLOv4::is_no_pad)
-      .def_readwrite("is_scale_up",
-                     &vision::detection::ScaledYOLOv4::is_scale_up)
-      .def_readwrite("stride", &vision::detection::ScaledYOLOv4::stride)
-      .def_readwrite("max_wh", &vision::detection::ScaledYOLOv4::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor.cc b/csrcs/fastdeploy/vision/detection/contrib/yolor.cc
deleted file mode 100644
index 5e6fa2fdd..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolor.cc
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolor.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOR::LetterBox(Mat* mat, const std::vector<int>& size,
-                      const std::vector<float>& color, bool _auto,
-                      bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOR::YOLOR(const std::string& model_file, const std::string& params_file,
-             const RuntimeOption& custom_option, const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER};
-    valid_gpu_backends = {Backend::PDINFER};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOR::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
-                       std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolor's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  YOLOR::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                   is_scale_up, stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOR::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOR::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                    float nms_iou_threshold) {
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor.h b/csrcs/fastdeploy/vision/detection/contrib/yolor.h
deleted file mode 100644
index 2de7a456f..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolor.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOR : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOR(const std::string& model_file, const std::string& params_file = "",
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  virtual std::string ModelName() const { return "YOLOR"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 对图片进行LetterBox处理
-  // mat 为读取到的原图
-  // size 为输入模型的图像尺寸
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc
deleted file mode 100644
index 0e0a21ca5..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOR(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOR, FastDeployModel>(m, "YOLOR")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOR& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOR::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOR::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOR::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOR::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOR::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOR::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOR::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc
deleted file mode 100644
index 306051e80..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc
+++ /dev/null
@@ -1,295 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov5.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOv5::LetterBox(Mat* mat, std::vector<int> size,
-                       std::vector<float> color, bool _auto, bool scale_fill,
-                       bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  Resize::Run(mat, resize_w, resize_h);
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOv5::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-  multi_label = true;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  double ratio = (size[0] * 1.0) / std::max(static_cast<float>(mat->Height()),
-                                            static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov5's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
-            stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOv5::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold, bool multi_label) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  if (multi_label) {
-    result->Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5));
-  } else {
-    result->Reserve(infer_result.shape[1]);
-  }
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    if (multi_label) {
-      for (size_t j = 5; j < infer_result.shape[2]; ++j) {
-        confidence = data[s + 4];
-        float* class_score = data + s + j;
-        confidence *= (*class_score);
-        // filter boxes by conf_threshold
-        if (confidence <= conf_threshold) {
-          continue;
-        }
-        int32_t label_id = std::distance(data + s + 5, class_score);
-
-        // convert from [x, y, w, h] to [x1, y1, x2, y2]
-        result->boxes.emplace_back(std::array<float, 4>{
-            data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-            data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-            data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-            data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-        result->label_ids.push_back(label_id);
-        result->scores.push_back(confidence);
-      }
-    } else {
-      float* max_class_score =
-          std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-      confidence *= (*max_class_score);
-      // filter boxes by conf_threshold
-      if (confidence <= conf_threshold) {
-        continue;
-      }
-      int32_t label_id = std::distance(data + s + 5, max_class_score);
-      // convert from [x, y, w, h] to [x1, y1, x2, y2]
-      result->boxes.emplace_back(std::array<float, 4>{
-          data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-          data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-          data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-          data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-      result->label_ids.push_back(label_id);
-      result->scores.push_back(confidence);
-    }
-  }
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    float pad_h = (out_h - ipt_h * scale) / 2;
-    float pad_w = (out_w - ipt_w * scale) / 2;
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h);
-  }
-  return true;
-}
-
-bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                     float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold, multi_label)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5.h b/csrcs/fastdeploy/vision/detection/contrib/yolov5.h
deleted file mode 100644
index 68c910d23..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOv5(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "yolov5"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-  // for different strategies to get boxes when postprocessing
-  bool multi_label;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  // multi_label 后处理时box选取是否采用多标签方式
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold,
-                   bool multi_label);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
-                 bool _auto, bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by
-  // default.
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
deleted file mode 100644
index 65ba538b8..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv5(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv5, FastDeployModel>(m, "YOLOv5")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOv5& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOv5::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOv5::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv5::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh)
-      .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc
deleted file mode 100644
index 26ca15f1e..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc
+++ /dev/null
@@ -1,399 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov5lite.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOv5Lite::LetterBox(Mat* mat, const std::vector<int>& size,
-                           const std::vector<float>& color, bool _auto,
-                           bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-void YOLOv5Lite::GenerateAnchors(const std::vector<int>& size,
-                                 const std::vector<int>& downsample_strides,
-                                 std::vector<Anchor>* anchors,
-                                 int num_anchors) {
-  // size: tuple of input (width, height)
-  // downsample_strides: downsample strides in YOLOv5Lite, e.g (8,16,32)
-  const int width = size[0];
-  const int height = size[1];
-  for (int i = 0; i < downsample_strides.size(); ++i) {
-    const int ds = downsample_strides[i];
-    int num_grid_w = width / ds;
-    int num_grid_h = height / ds;
-    for (int an = 0; an < num_anchors; ++an) {
-      float anchor_w = anchor_config[i][an * 2];
-      float anchor_h = anchor_config[i][an * 2 + 1];
-      for (int g1 = 0; g1 < num_grid_h; ++g1) {
-        for (int g0 = 0; g0 < num_grid_w; ++g0) {
-          (*anchors).emplace_back(Anchor{g0, g1, ds, anchor_w, anchor_h});
-        }
-      }
-    }
-  }
-}
-
-YOLOv5Lite::YOLOv5Lite(const std::string& model_file,
-                       const std::string& params_file,
-                       const RuntimeOption& custom_option,
-                       const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOv5Lite::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  downsample_strides = {8, 16, 32};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-  is_decode_exported = false;
-  anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0},
-                   {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},
-                   {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOv5Lite::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov5lite's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  YOLOv5Lite::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                        is_scale_up, stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOv5Lite::PostprocessWithDecode(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  // generate anchors with dowmsample strides
-  std::vector<YOLOv5Lite::Anchor> anchors;
-  int num_anchors = anchor_config[0].size() / 2;
-  GenerateAnchors(size, downsample_strides, &anchors, num_anchors);
-  // infer_result shape might look like (1,n,85=5+80)
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // fetch i-th anchor
-    float grid0 = static_cast<float>(anchors.at(i).grid0);
-    float grid1 = static_cast<float>(anchors.at(i).grid1);
-    float downsample_stride = static_cast<float>(anchors.at(i).stride);
-    float anchor_w = static_cast<float>(anchors.at(i).anchor_w);
-    float anchor_h = static_cast<float>(anchors.at(i).anchor_h);
-    // convert from offsets to [x, y, w, h]
-    float dx = data[s];
-    float dy = data[s + 1];
-    float dw = data[s + 2];
-    float dh = data[s + 3];
-
-    float x = (dx * 2.0f - 0.5f + grid0) * downsample_stride;
-    float y = (dy * 2.0f - 0.5f + grid1) * downsample_stride;
-    float w = std::pow(dw * 2.0f, 2.0f) * anchor_w;
-    float h = std::pow(dh * 2.0f, 2.0f) * anchor_h;
-
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh,
-        x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh});
-    // label_id * max_wh for multi classes NMS
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv5Lite::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result,
-                         float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (is_decode_exported) {
-    if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                     nms_iou_threshold)) {
-      FDERROR << "Failed to post process." << std::endl;
-      return false;
-    }
-  } else {
-    if (!PostprocessWithDecode(output_tensors[0], result, im_info,
-                               conf_threshold, nms_iou_threshold)) {
-      FDERROR << "Failed to post process." << std::endl;
-      return false;
-    }
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h
deleted file mode 100644
index 2add202f4..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOv5Lite(const std::string& model_file, const std::string& params_file = "",
-             const RuntimeOption& custom_option = RuntimeOption(),
-             const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  virtual std::string ModelName() const { return "YOLOv5-Lite"; }
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.45,
-                       float nms_iou_threshold = 0.25);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-  // downsample strides for YOLOv5Lite to generate anchors, will take
-  // (8,16,32) as default values, might have stride=64.
-  std::vector<int> downsample_strides;
-  // anchors parameters, downsample_strides will take
-  // (8,16,32), each stride has three anchors with width and hight.
-  std::vector<std::vector<float>> anchor_config;
-  // whether the model_file was exported with decode module. The official
-  // YOLOv5Lite/export.py script will export ONNX file without
-  // decode module. Please set it 'true' manually if the model file
-  // was exported with decode module.
-  // false : ONNX files without decode module.
-  // true : ONNX file with decode module.
-  bool is_decode_exported;
-
- private:
-  // necessary parameters for GenerateAnchors to generate anchors when ONNX file
-  // without decode module.
-  struct Anchor {
-    int grid0;
-    int grid1;
-    int stride;
-    float anchor_w;
-    float anchor_h;
-  };
-
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // YOLOv5Lite的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode
-  // the official YOLOv5Lite/export.py will export ONNX file without decode
-  // module.
-  // this fuction support the postporocess for ONNX file without decode module.
-  // set the `is_decode_exported = false`, this function will work.
-  bool PostprocessWithDecode(
-      FDTensor& infer_result, DetectionResult* result,
-      const std::map<std::string, std::array<float, 2>>& im_info,
-      float conf_threshold, float nms_iou_threshold);
-
-  // 对图片进行LetterBox处理
-  // mat 为读取到的原图
-  // size 为输入模型的图像尺寸
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-  // generate anchors for decodeing when ONNX file without decode module.
-  void GenerateAnchors(const std::vector<int>& size,
-                       const std::vector<int>& downsample_strides,
-                       std::vector<Anchor>* anchors, const int num_anchors = 3);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc
deleted file mode 100644
index dd064e3be..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv5Lite(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv5Lite, FastDeployModel>(m,
-                                                                   "YOLOv5Lite")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOv5Lite& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOv5Lite::size)
-      .def_readwrite("padding_value",
-                     &vision::detection::YOLOv5Lite::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv5Lite::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv5Lite::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv5Lite::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv5Lite::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv5Lite::max_wh)
-      .def_readwrite("anchor_config",
-                     &vision::detection::YOLOv5Lite::anchor_config)
-      .def_readwrite("is_decode_exported",
-                     &vision::detection::YOLOv5Lite::is_decode_exported);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc
deleted file mode 100644
index 7c6827433..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov6.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-void YOLOv6::LetterBox(Mat* mat, std::vector<int> size,
-                       std::vector<float> color, bool _auto, bool scale_fill,
-                       bool scale_up, int stride) {
-  float scale = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(static_cast<float>(mat->Height()) * scale));
-  int resize_w = int(round(static_cast<float>(mat->Width()) * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv6::YOLOv6(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOv6::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 4096.0f;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
-    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov6's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
-            stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOv6::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    float pad_h = (out_h - ipt_h * scale) / 2;
-    float pad_w = (out_w - ipt_w * scale) / 2;
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv6::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                     float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6.h b/csrcs/fastdeploy/vision/detection/contrib/yolov6.h
deleted file mode 100644
index 64af6e2eb..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov6.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOv6(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "YOLOv6"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS, default 4096 in
-  // meituan/YOLOv6
-  float max_wh;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
-                 bool _auto, bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // meituan/YOLOv6 official 'export_onnx.py' script will export static ONNX by
-  // default.
-  // while is_dynamic_input if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc
deleted file mode 100644
index a1d0131df..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv6(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv6, FastDeployModel>(m, "YOLOv6")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOv6& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOv6::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOv6::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv6::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv6::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv6::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv6::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv6::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc
deleted file mode 100644
index edc1b9048..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolov7.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-void YOLOv7::LetterBox(Mat* mat, const std::vector<int>& size,
-                       const std::vector<float>& color, bool _auto,
-                       bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER};
-    valid_gpu_backends = {Backend::PDINFER};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  initialized = Initialize();
-}
-
-bool YOLOv7::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  max_wh = 7680.0;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOv7::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov7's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  YOLOv7::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                    is_scale_up, stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOv7::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                     float nms_iou_threshold) {
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7.h b/csrcs/fastdeploy/vision/detection/contrib/yolov7.h
deleted file mode 100644
index 02b874b2c..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov7.h
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel {
- public:
-  YOLOv7(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  virtual std::string ModelName() const { return "yolov7"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  float max_wh;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 对图片进行LetterBox处理
-  // mat 为读取到的原图
-  // size 为输入模型的图像尺寸
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc
deleted file mode 100644
index bf196fa9f..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv7(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOv7, FastDeployModel>(m, "YOLOv7")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOv7& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOv7::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOv7::padding_value)
-      .def_readwrite("is_mini_pad", &vision::detection::YOLOv7::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::detection::YOLOv7::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::detection::YOLOv7::is_scale_up)
-      .def_readwrite("stride", &vision::detection::YOLOv7::stride)
-      .def_readwrite("max_wh", &vision::detection::YOLOv7::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox.cc b/csrcs/fastdeploy/vision/detection/contrib/yolox.cc
deleted file mode 100644
index 5d3880657..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolox.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/contrib/yolox.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-struct YOLOXAnchor {
-  int grid0;
-  int grid1;
-  int stride;
-};
-
-void GenerateYOLOXAnchors(const std::vector<int>& size,
-                          const std::vector<int>& downsample_strides,
-                          std::vector<YOLOXAnchor>* anchors) {
-  // size: tuple of input (width, height)
-  // downsample_strides: downsample strides in YOLOX, e.g (8,16,32)
-  const int width = size[0];
-  const int height = size[1];
-  for (const auto& ds : downsample_strides) {
-    int num_grid_w = width / ds;
-    int num_grid_h = height / ds;
-    for (int g1 = 0; g1 < num_grid_h; ++g1) {
-      for (int g0 = 0; g0 < num_grid_w; ++g0) {
-        (*anchors).emplace_back(YOLOXAnchor{g0, g1, ds});
-      }
-    }
-  }
-}
-
-void LetterBoxWithRightBottomPad(Mat* mat, std::vector<int> size,
-                                 std::vector<float> color) {
-  // specific pre process for YOLOX, not the same as YOLOv5
-  // reference: YOLOX/yolox/data/data_augment.py#L142
-  float r = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                     size[0] * 1.0f / static_cast<float>(mat->Width()));
-
-  int resize_h = int(round(static_cast<float>(mat->Height()) * r));
-  int resize_w = int(round(static_cast<float>(mat->Width()) * r));
-
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  // right-bottom padding for YOLOX
-  if (pad_h > 0 || pad_w > 0) {
-    int top = 0;
-    int left = 0;
-    int right = pad_w;
-    int bottom = pad_h;
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOX::YOLOX(const std::string& model_file, const std::string& params_file,
-             const RuntimeOption& custom_option, const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOX::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  downsample_strides = {8, 16, 32};
-  max_wh = 4096.0f;
-  is_decode_exported = false;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  return true;
-}
-
-bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
-                       std::map<std::string, std::array<float, 2>>* im_info) {
-  // YOLOX ( >= v0.1.1) preprocess steps
-  // 1. preproc
-  // 2. HWC->CHW
-  // 3. NO!!! BRG2GRB and Normalize needed in YOLOX
-  LetterBoxWithRightBottomPad(mat, size, padding_value);
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOX::Postprocess(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        data[s] - data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh,
-        data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh,
-        data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh});
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float r = std::min(out_h / ipt_h, out_w / ipt_w);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f);
-    result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f);
-    result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f);
-    result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOX::PostprocessWithDecode(
-    FDTensor& infer_result, DetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  result->Clear();
-  result->Reserve(infer_result.shape[1]);
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  // generate anchors with dowmsample strides
-  std::vector<YOLOXAnchor> anchors;
-  GenerateYOLOXAnchors(size, downsample_strides, &anchors);
-
-  // infer_result shape might look like (1,n,85=5+80)
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    int s = i * infer_result.shape[2];
-    float confidence = data[s + 4];
-    float* max_class_score =
-        std::max_element(data + s + 5, data + s + infer_result.shape[2]);
-    confidence *= (*max_class_score);
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    int32_t label_id = std::distance(data + s + 5, max_class_score);
-    // fetch i-th anchor
-    float grid0 = static_cast<float>(anchors.at(i).grid0);
-    float grid1 = static_cast<float>(anchors.at(i).grid1);
-    float downsample_stride = static_cast<float>(anchors.at(i).stride);
-    // convert from offsets to [x, y, w, h]
-    float dx = data[s];
-    float dy = data[s + 1];
-    float dw = data[s + 2];
-    float dh = data[s + 3];
-
-    float x = (dx + grid0) * downsample_stride;
-    float y = (dy + grid1) * downsample_stride;
-    float w = std::exp(dw) * downsample_stride;
-    float h = std::exp(dh) * downsample_stride;
-
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh,
-        x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh});
-    // label_id * max_wh for multi classes NMS
-    result->label_ids.push_back(label_id);
-    result->scores.push_back(confidence);
-  }
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float r = std::min(out_h / ipt_h, out_w / ipt_w);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    int32_t label_id = (result->label_ids)[i];
-    // clip box
-    result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id;
-    result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id;
-    result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id;
-    result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id;
-    result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f);
-    result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f);
-    result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f);
-    result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOX::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
-                    float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (is_decode_exported) {
-    if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                     nms_iou_threshold)) {
-      FDERROR << "Failed to post process." << std::endl;
-      return false;
-    }
-  } else {
-    if (!PostprocessWithDecode(output_tensors[0], result, im_info,
-                               conf_threshold, nms_iou_threshold)) {
-      FDERROR << "Failed to post process." << std::endl;
-      return false;
-    }
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox.h b/csrcs/fastdeploy/vision/detection/contrib/yolox.h
deleted file mode 100644
index fc27ca1ed..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolox.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOX : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOX(const std::string& model_file, const std::string& params_file = "",
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "YOLOX"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, DetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // whether the model_file was exported with decode module. The official
-  // YOLOX/tools/export_onnx.py script will export ONNX file without
-  // decode module. Please set it 'true' manually if the model file
-  // was exported with decode module.
-  bool is_decode_exported;
-  // downsample strides for YOLOX to generate anchors, will take
-  // (8,16,32) as default values, might have stride=64.
-  std::vector<int> downsample_strides;
-  // for offseting the boxes by classes when using NMS, default 4096.
-  float max_wh;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, DetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // YOLOX的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode
-  bool PostprocessWithDecode(
-      FDTensor& infer_result, DetectionResult* result,
-      const std::map<std::string, std::array<float, 2>>& im_info,
-      float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  // whether to inference with dynamic shape (e.g ONNX export with dynamic shape
-  // or not.)
-  // megvii/YOLOX official 'export_onnx.py' script will export static ONNX by
-  // default.
-  // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This
-  // value will
-  // auto check by fastdeploy after the internal Runtime already initialized.
-  bool is_dynamic_input_;
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc
deleted file mode 100644
index 68cb6a426..000000000
--- a/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOX(pybind11::module& m) {
-  pybind11::class_<vision::detection::YOLOX, FastDeployModel>(m, "YOLOX")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::detection::YOLOX& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::detection::YOLOX::size)
-      .def_readwrite("padding_value", &vision::detection::YOLOX::padding_value)
-      .def_readwrite("is_decode_exported",
-                     &vision::detection::YOLOX::is_decode_exported)
-      .def_readwrite("downsample_strides",
-                     &vision::detection::YOLOX::downsample_strides)
-      .def_readwrite("max_wh", &vision::detection::YOLOX::max_wh);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/detection_pybind.cc b/csrcs/fastdeploy/vision/detection/detection_pybind.cc
deleted file mode 100644
index a865dc11e..000000000
--- a/csrcs/fastdeploy/vision/detection/detection_pybind.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindYOLOv7(pybind11::module& m);
-void BindScaledYOLOv4(pybind11::module& m);
-void BindYOLOR(pybind11::module& m);
-void BindYOLOv6(pybind11::module& m);
-void BindYOLOv5Lite(pybind11::module& m);
-void BindYOLOv5(pybind11::module& m);
-void BindYOLOX(pybind11::module& m);
-void BindNanoDetPlus(pybind11::module& m);
-void BindPPDet(pybind11::module& m);
-
-void BindDetection(pybind11::module& m) {
-  auto detection_module =
-      m.def_submodule("detection", "Image object detection models.");
-  BindPPDet(detection_module);
-  BindYOLOv7(detection_module);
-  BindScaledYOLOv4(detection_module);
-  BindYOLOR(detection_module);
-  BindYOLOv6(detection_module);
-  BindYOLOv5Lite(detection_module);
-  BindYOLOv5(detection_module);
-  BindYOLOX(detection_module);
-  BindNanoDetPlus(detection_module);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/model.h b/csrcs/fastdeploy/vision/detection/ppdet/model.h
deleted file mode 100644
index f40c6b7fe..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/model.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/picodet.h"
-#include "fastdeploy/vision/detection/ppdet/ppyolo.h"
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-#include "fastdeploy/vision/detection/ppdet/rcnn.h"
-#include "fastdeploy/vision/detection/ppdet/yolov3.h"
-#include "fastdeploy/vision/detection/ppdet/yolox.h"
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc b/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc
deleted file mode 100644
index d89fab2ae..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/ppdet/picodet.h"
-#include "yaml-cpp/yaml.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-PicoDet::PicoDet(const std::string& model_file, const std::string& params_file,
-                 const std::string& config_file,
-                 const RuntimeOption& custom_option,
-                 const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  background_label = -1;
-  keep_top_k = 100;
-  nms_eta = 1;
-  nms_threshold = 0.6;
-  nms_top_k = 1000;
-  normalized = true;
-  score_threshold = 0.025;
-  CheckIfContainDecodeAndNMS();
-  initialized = Initialize();
-}
-
-bool PicoDet::CheckIfContainDecodeAndNMS() {
-  YAML::Node cfg;
-  try {
-    cfg = YAML::LoadFile(config_file_);
-  } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file_
-            << ", maybe you should check this file." << std::endl;
-    return false;
-  }
-
-  if (cfg["arch"].as<std::string>() == "PicoDet") {
-    FDERROR << "The arch in config file is PicoDet, which means this model "
-               "doesn contain box decode and nms, please export model with "
-               "decode and nms."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/picodet.h b/csrcs/fastdeploy/vision/detection/ppdet/picodet.h
deleted file mode 100644
index 984e56222..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/picodet.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL PicoDet : public PPYOLOE {
- public:
-  PicoDet(const std::string& model_file, const std::string& params_file,
-          const std::string& config_file,
-          const RuntimeOption& custom_option = RuntimeOption(),
-          const Frontend& model_format = Frontend::PADDLE);
-
-  // Only support picodet contains decode and nms
-  bool CheckIfContainDecodeAndNMS();
-
-  virtual std::string ModelName() const { return "PicoDet"; }
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
deleted file mode 100644
index 2f4b0fefc..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindPPDet(pybind11::module& m) {
-  pybind11::class_<vision::detection::PPYOLOE, FastDeployModel>(m,
-                                                            "PPYOLOE")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::PPYOLOE& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-
-  pybind11::class_<vision::detection::PPYOLO, FastDeployModel>(m,
-                                                           "PPYOLO")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::PPYOLO& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-
-  pybind11::class_<vision::detection::PPYOLOv2, FastDeployModel>(m,
-                                                           "PPYOLOv2")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::PPYOLOv2& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-
-  pybind11::class_<vision::detection::PicoDet, FastDeployModel>(m,
-                                                            "PicoDet")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::PicoDet& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-
-  pybind11::class_<vision::detection::PaddleYOLOX, FastDeployModel>(m, "PaddleYOLOX")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::PaddleYOLOX& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-
-  pybind11::class_<vision::detection::FasterRCNN, FastDeployModel>(m,
-                                                               "FasterRCNN")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict",
-           [](vision::detection::FasterRCNN& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::DetectionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           });
-
-  pybind11::class_<vision::detection::YOLOv3, FastDeployModel>(m,
-                                                           "YOLOv3")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict", [](vision::detection::YOLOv3& self, pybind11::array& data) {
-        auto mat = PyArrayToCvMat(data);
-        vision::DetectionResult res;
-        self.Predict(&mat, &res);
-        return res;
-      });
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc
deleted file mode 100644
index 6c202f0d0..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/ppdet/ppyolo.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-PPYOLO::PPYOLO(const std::string& model_file, const std::string& params_file,
-               const std::string& config_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER};
-  valid_gpu_backends = {Backend::PDINFER};
-  has_nms_ = true;
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool PPYOLO::Initialize() {
-  if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file."
-            << std::endl;
-    return false;
-  }
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool PPYOLO::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-  }
-
-  outputs->resize(3);
-  (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape");
-  (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor");
-  float* ptr0 = static_cast<float*>((*outputs)[0].MutableData());
-  ptr0[0] = mat->Height();
-  ptr0[1] = mat->Width();
-  float* ptr2 = static_cast<float*>((*outputs)[2].MutableData());
-  ptr2[0] = mat->Height() * 1.0 / origin_h;
-  ptr2[1] = mat->Width() * 1.0 / origin_w;
-  (*outputs)[1].name = "image";
-  mat->ShareWithTensor(&((*outputs)[1]));
-  // reshape to [1, c, h, w]
-  (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1);
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h b/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h
deleted file mode 100644
index 1b3b48780..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL PPYOLO : public PPYOLOE {
- public:
-  PPYOLO(const std::string& model_file, const std::string& params_file,
-         const std::string& config_file,
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::PADDLE);
-
-  virtual std::string ModelName() const { return "PaddleDetection/PPYOLO"; }
-
-  virtual bool Preprocess(Mat* mat, std::vector<FDTensor>* outputs);
-  virtual bool Initialize();
-
- protected:
-  PPYOLO() {}
-};
-
-class FASTDEPLOY_DECL PPYOLOv2 : public PPYOLO {
-  public:
-  PPYOLOv2(const std::string& model_file, const std::string& params_file,
-         const std::string& config_file,
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::PADDLE) : PPYOLO(model_file, params_file, config_file, custom_option, model_format) {
-  }
-
-  virtual std::string ModelName() const { return "PaddleDetection/PPYOLOv2"; }
-};
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc
deleted file mode 100644
index 2e4b56ecb..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-#include "fastdeploy/vision/utils/utils.h"
-#include "yaml-cpp/yaml.h"
-#ifdef ENABLE_PADDLE_FRONTEND
-#include "paddle2onnx/converter.h"
-#endif
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-PPYOLOE::PPYOLOE(const std::string& model_file, const std::string& params_file,
-                 const std::string& config_file,
-                 const RuntimeOption& custom_option,
-                 const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-void PPYOLOE::GetNmsInfo() {
-  if (runtime_option.model_format == Frontend::PADDLE) {
-    std::string contents;
-    if (!ReadBinaryFromFile(runtime_option.model_file, &contents)) {
-      return;
-    }
-    auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
-    if (reader.has_nms) {
-      has_nms_ = true;
-      background_label = reader.nms_params.background_label;
-      keep_top_k = reader.nms_params.keep_top_k;
-      nms_eta = reader.nms_params.nms_eta;
-      nms_threshold = reader.nms_params.nms_threshold;
-      score_threshold = reader.nms_params.score_threshold;
-      nms_top_k = reader.nms_params.nms_top_k;
-      normalized = reader.nms_params.normalized;
-    }
-  }
-}
-
-bool PPYOLOE::Initialize() {
-#ifdef ENABLE_PADDLE_FRONTEND
-  // remove multiclass_nms3 now
-  // this is a trick operation for ppyoloe while inference on trt
-  GetNmsInfo();
-  runtime_option.remove_multiclass_nms_ = true;
-  runtime_option.custom_op_info_["multiclass_nms3"] = "MultiClassNMS";
-#endif
-  if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file."
-            << std::endl;
-    return false;
-  }
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-
-  if (has_nms_ && runtime_option.backend == Backend::TRT) {
-    FDINFO << "Detected operator multiclass_nms3 in your model, will replace "
-              "it with fastdeploy::backend::MultiClassNMS(background_label="
-           << background_label << ", keep_top_k=" << keep_top_k
-           << ", nms_eta=" << nms_eta << ", nms_threshold=" << nms_threshold
-           << ", score_threshold=" << score_threshold
-           << ", nms_top_k=" << nms_top_k << ", normalized=" << normalized
-           << ")." << std::endl;
-    has_nms_ = false;
-  }
-  return true;
-}
-
-bool PPYOLOE::BuildPreprocessPipelineFromConfig() {
-  processors_.clear();
-  YAML::Node cfg;
-  try {
-    cfg = YAML::LoadFile(config_file_);
-  } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file_
-            << ", maybe you should check this file." << std::endl;
-    return false;
-  }
-
-  processors_.push_back(std::make_shared<BGR2RGB>());
-
-  for (const auto& op : cfg["Preprocess"]) {
-    std::string op_name = op["type"].as<std::string>();
-    if (op_name == "NormalizeImage") {
-      auto mean = op["mean"].as<std::vector<float>>();
-      auto std = op["std"].as<std::vector<float>>();
-      bool is_scale = op["is_scale"].as<bool>();
-      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
-    } else if (op_name == "Resize") {
-      bool keep_ratio = op["keep_ratio"].as<bool>();
-      auto target_size = op["target_size"].as<std::vector<int>>();
-      int interp = op["interp"].as<int>();
-      FDASSERT(target_size.size(),
-               "Require size of target_size be 2, but now it's " +
-                   std::to_string(target_size.size()) + ".");
-      if (!keep_ratio) {
-        int width = target_size[1];
-        int height = target_size[0];
-        processors_.push_back(
-            std::make_shared<Resize>(width, height, -1.0, -1.0, interp, false));
-      } else {
-        int min_target_size = std::min(target_size[0], target_size[1]);
-        int max_target_size = std::max(target_size[0], target_size[1]);
-        processors_.push_back(std::make_shared<ResizeByShort>(
-            min_target_size, interp, true, max_target_size));
-      }
-    } else if (op_name == "Permute") {
-      // Do nothing, do permute as the last operation
-      continue;
-      // processors_.push_back(std::make_shared<HWC2CHW>());
-    } else if (op_name == "Pad") {
-      auto size = op["size"].as<std::vector<int>>();
-      auto value = op["fill_value"].as<std::vector<float>>();
-      processors_.push_back(std::make_shared<Cast>("float"));
-      processors_.push_back(
-          std::make_shared<PadToSize>(size[1], size[0], value));
-    } else if (op_name == "PadStride") {
-      auto stride = op["stride"].as<int>();
-      processors_.push_back(
-          std::make_shared<StridePad>(stride, std::vector<float>(3, 0)));
-    } else {
-      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
-              << std::endl;
-      return false;
-    }
-  }
-  processors_.push_back(std::make_shared<HWC2CHW>());
-  return true;
-}
-
-bool PPYOLOE::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-  }
-
-  outputs->resize(2);
-  (*outputs)[0].name = InputInfoOfRuntime(0).name;
-  mat->ShareWithTensor(&((*outputs)[0]));
-
-  // reshape to [1, c, h, w]
-  (*outputs)[0].shape.insert((*outputs)[0].shape.begin(), 1);
-
-  (*outputs)[1].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(1).name);
-  float* ptr = static_cast<float*>((*outputs)[1].MutableData());
-  ptr[0] = mat->Height() * 1.0 / origin_h;
-  ptr[1] = mat->Width() * 1.0 / origin_w;
-  return true;
-}
-
-bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
-                          DetectionResult* result) {
-  FDASSERT(infer_result[1].shape[0] == 1,
-           "Only support batch = 1 in FastDeploy now.");
-
-  if (!has_nms_) {
-    int boxes_index = 0;
-    int scores_index = 1;
-    if (infer_result[0].shape[1] == infer_result[1].shape[2]) {
-      boxes_index = 0;
-      scores_index = 1;
-    } else if (infer_result[0].shape[2] == infer_result[1].shape[1]) {
-      boxes_index = 1;
-      scores_index = 0;
-    } else {
-      FDERROR << "The shape of boxes and scores should be [batch, boxes_num, "
-                 "4], [batch, classes_num, boxes_num]"
-              << std::endl;
-      return false;
-    }
-
-    backend::MultiClassNMS nms;
-    nms.background_label = background_label;
-    nms.keep_top_k = keep_top_k;
-    nms.nms_eta = nms_eta;
-    nms.nms_threshold = nms_threshold;
-    nms.score_threshold = score_threshold;
-    nms.nms_top_k = nms_top_k;
-    nms.normalized = normalized;
-    nms.Compute(static_cast<float*>(infer_result[boxes_index].Data()),
-                static_cast<float*>(infer_result[scores_index].Data()),
-                infer_result[boxes_index].shape,
-                infer_result[scores_index].shape);
-    if (nms.out_num_rois_data[0] > 0) {
-      result->Reserve(nms.out_num_rois_data[0]);
-    }
-    for (size_t i = 0; i < nms.out_num_rois_data[0]; ++i) {
-      result->label_ids.push_back(nms.out_box_data[i * 6]);
-      result->scores.push_back(nms.out_box_data[i * 6 + 1]);
-      result->boxes.emplace_back(std::array<float, 4>{
-          nms.out_box_data[i * 6 + 2], nms.out_box_data[i * 6 + 3],
-          nms.out_box_data[i * 6 + 4], nms.out_box_data[i * 6 + 5]});
-    }
-  } else {
-    int box_num = 0;
-    if (infer_result[1].dtype == FDDataType::INT32) {
-      box_num = *(static_cast<int32_t*>(infer_result[1].Data()));
-    } else if (infer_result[1].dtype == FDDataType::INT64) {
-      box_num = *(static_cast<int64_t*>(infer_result[1].Data()));
-    } else {
-      FDASSERT(
-          false,
-          "The output box_num of PPYOLOE model should be type of int32/int64.");
-    }
-    result->Reserve(box_num);
-    float* box_data = static_cast<float*>(infer_result[0].Data());
-    for (size_t i = 0; i < box_num; ++i) {
-      result->label_ids.push_back(box_data[i * 6]);
-      result->scores.push_back(box_data[i * 6 + 1]);
-      result->boxes.emplace_back(
-          std::array<float, 4>{box_data[i * 6 + 2], box_data[i * 6 + 3],
-                               box_data[i * 6 + 4], box_data[i * 6 + 5]});
-    }
-  }
-  return true;
-}
-
-bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) {
-  Mat mat(*im);
-  std::vector<FDTensor> processed_data;
-  if (!Preprocess(&mat, &processed_data)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  float* tmp = static_cast<float*>(processed_data[1].Data());
-  std::vector<FDTensor> infer_result;
-  if (!Infer(processed_data, &infer_result)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(infer_result, result)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h b/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h
deleted file mode 100644
index 2d8cca99f..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL PPYOLOE : public FastDeployModel {
- public:
-  PPYOLOE(const std::string& model_file, const std::string& params_file,
-          const std::string& config_file,
-          const RuntimeOption& custom_option = RuntimeOption(),
-          const Frontend& model_format = Frontend::PADDLE);
-
-  virtual std::string ModelName() const { return "PaddleDetection/PPYOLOE"; }
-
-  virtual bool Initialize();
-
-  virtual bool BuildPreprocessPipelineFromConfig();
-
-  virtual bool Preprocess(Mat* mat, std::vector<FDTensor>* outputs);
-
-  virtual bool Postprocess(std::vector<FDTensor>& infer_result,
-                           DetectionResult* result);
-
-  virtual bool Predict(cv::Mat* im, DetectionResult* result);
-
- protected:
-  PPYOLOE() {}
-
-  std::vector<std::shared_ptr<Processor>> processors_;
-  std::string config_file_;
-  // configuration for nms
-  int64_t background_label = -1;
-  int64_t keep_top_k = 300;
-  float nms_eta = 1.0;
-  float nms_threshold = 0.7;
-  float score_threshold = 0.01;
-  int64_t nms_top_k = 10000;
-  bool normalized = true;
-  bool has_nms_ = false;
-
-  // This function will used to check if this model contains multiclass_nms
-  // and get parameters from the operator
-  void GetNmsInfo();
-};
-
-}  // namespace detection 
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc b/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc
deleted file mode 100644
index 38ecc3d1c..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/ppdet/rcnn.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-FasterRCNN::FasterRCNN(const std::string& model_file,
-                       const std::string& params_file,
-                       const std::string& config_file,
-                       const RuntimeOption& custom_option,
-                       const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER};
-  valid_gpu_backends = {Backend::PDINFER};
-  has_nms_ = true;
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool FasterRCNN::Initialize() {
-  if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file."
-            << std::endl;
-    return false;
-  }
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool FasterRCNN::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  float scale[2] = {1.0, 1.0};
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-    if (processors_[i]->Name().find("Resize") != std::string::npos) {
-      scale[0] = mat->Height() * 1.0 / origin_h;
-      scale[1] = mat->Width() * 1.0 / origin_w;
-    }
-  }
-
-  outputs->resize(3);
-  (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape");
-  (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor");
-  float* ptr0 = static_cast<float*>((*outputs)[0].MutableData());
-  ptr0[0] = mat->Height();
-  ptr0[1] = mat->Width();
-  float* ptr2 = static_cast<float*>((*outputs)[2].MutableData());
-  ptr2[0] = scale[0];
-  ptr2[1] = scale[1];
-  (*outputs)[1].name = "image";
-  mat->ShareWithTensor(&((*outputs)[1]));
-  // reshape to [1, c, h, w]
-  (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1);
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h b/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h
deleted file mode 100644
index d44ca852e..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL FasterRCNN : public PPYOLOE {
- public:
-  FasterRCNN(const std::string& model_file, const std::string& params_file,
-             const std::string& config_file,
-             const RuntimeOption& custom_option = RuntimeOption(),
-             const Frontend& model_format = Frontend::PADDLE);
-
-  virtual std::string ModelName() const { return "PaddleDetection/FasterRCNN"; }
-
-  virtual bool Preprocess(Mat* mat, std::vector<FDTensor>* outputs);
-  virtual bool Initialize();
-
- protected:
-  FasterRCNN() {}
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc b/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc
deleted file mode 100644
index 309d65640..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/ppdet/yolov3.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-YOLOv3::YOLOv3(const std::string& model_file, const std::string& params_file,
-               const std::string& config_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER};
-  valid_gpu_backends = {Backend::PDINFER};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOv3::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-  }
-
-  outputs->resize(3);
-  (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape");
-  (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor");
-  float* ptr0 = static_cast<float*>((*outputs)[0].MutableData());
-  ptr0[0] = mat->Height();
-  ptr0[1] = mat->Width();
-  float* ptr2 = static_cast<float*>((*outputs)[2].MutableData());
-  ptr2[0] = mat->Height() * 1.0 / origin_h;
-  ptr2[1] = mat->Width() * 1.0 / origin_w;
-  (*outputs)[1].name = "image";
-  mat->ShareWithTensor(&((*outputs)[1]));
-  // reshape to [1, c, h, w]
-  (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1);
-  return true;
-}
-
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h b/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h
deleted file mode 100644
index 1b65bfca1..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL YOLOv3 : public PPYOLOE {
- public:
-  YOLOv3(const std::string& model_file, const std::string& params_file,
-         const std::string& config_file,
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::PADDLE);
-
-  virtual std::string ModelName() const { return "PaddleDetection/YOLOv3"; }
-
-  virtual bool Preprocess(Mat* mat, std::vector<FDTensor>* outputs);
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc b/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc
deleted file mode 100644
index a60ebfcc4..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/detection/ppdet/yolox.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-PaddleYOLOX::PaddleYOLOX(const std::string& model_file, const std::string& params_file,
-             const std::string& config_file, const RuntimeOption& custom_option,
-             const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  background_label = -1;
-  keep_top_k = 1000;
-  nms_eta = 1;
-  nms_threshold = 0.65;
-  nms_top_k = 10000;
-  normalized = true;
-  score_threshold = 0.001;
-  initialized = Initialize();
-}
-
-bool PaddleYOLOX::Preprocess(Mat* mat, std::vector<FDTensor>* outputs) {
-  int origin_w = mat->Width();
-  int origin_h = mat->Height();
-  float scale[2] = {1.0, 1.0};
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-    if (processors_[i]->Name().find("Resize") != std::string::npos) {
-      scale[0] = mat->Height() * 1.0 / origin_h;
-      scale[1] = mat->Width() * 1.0 / origin_w;
-    }
-  }
-
-  outputs->resize(2);
-  (*outputs)[0].name = InputInfoOfRuntime(0).name;
-  mat->ShareWithTensor(&((*outputs)[0]));
-
-  // reshape to [1, c, h, w]
-  (*outputs)[0].shape.insert((*outputs)[0].shape.begin(), 1);
-
-  (*outputs)[1].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(1).name);
-  float* ptr = static_cast<float*>((*outputs)[1].MutableData());
-  ptr[0] = scale[0];
-  ptr[1] = scale[1];
-  return true;
-}
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolox.h b/csrcs/fastdeploy/vision/detection/ppdet/yolox.h
deleted file mode 100644
index 4ffe2f39c..000000000
--- a/csrcs/fastdeploy/vision/detection/ppdet/yolox.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/vision/detection/ppdet/ppyoloe.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace detection {
-
-class FASTDEPLOY_DECL PaddleYOLOX : public PPYOLOE {
- public:
-  PaddleYOLOX(const std::string& model_file, const std::string& params_file,
-        const std::string& config_file,
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::PADDLE);
-
-  virtual bool Preprocess(Mat* mat, std::vector<FDTensor>* outputs);
-
-  virtual std::string ModelName() const { return "PaddleDetection/YOLOX"; }
-};
-}  // namespace detection
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc b/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc
deleted file mode 100644
index ebb52010e..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/facedet/contrib/retinaface.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-struct RetinaAnchor {
-  float cx;
-  float cy;
-  float s_kx;
-  float s_ky;
-};
-
-void GenerateRetinaAnchors(const std::vector<int>& size,
-                           const std::vector<int>& downsample_strides,
-                           const std::vector<std::vector<int>>& min_sizes,
-                           std::vector<RetinaAnchor>* anchors) {
-  // size: tuple of input (width, height)
-  // downsample_strides: downsample strides (steps), e.g (8,16,32)
-  // min_sizes: width and height for each anchor,
-  // e.g {{16, 32}, {64, 128}, {256, 512}}
-  int h = size[1];
-  int w = size[0];
-  std::vector<std::vector<int>> feature_maps;
-  for (auto s : downsample_strides) {
-    feature_maps.push_back(
-        {static_cast<int>(
-             std::ceil(static_cast<float>(h) / static_cast<float>(s))),
-         static_cast<int>(
-             std::ceil(static_cast<float>(w) / static_cast<float>(s)))});
-  }
-
-  (*anchors).clear();
-  const size_t num_feature_map = feature_maps.size();
-  // reference: layers/functions/prior_box.py#L21
-  for (size_t k = 0; k < num_feature_map; ++k) {
-    auto f_map = feature_maps.at(k);       // e.g [640//8,640//8]
-    auto tmp_min_sizes = min_sizes.at(k);  // e.g [8,16]
-    int f_h = f_map.at(0);
-    int f_w = f_map.at(1);
-    for (size_t i = 0; i < f_h; ++i) {
-      for (size_t j = 0; j < f_w; ++j) {
-        for (auto min_size : tmp_min_sizes) {
-          float s_kx =
-              static_cast<float>(min_size) / static_cast<float>(w);  // e.g 16/w
-          float s_ky =
-              static_cast<float>(min_size) / static_cast<float>(h);  // e.g 16/h
-          // (x + 0.5) * step / w normalized loc mapping to input width
-          // (y + 0.5) * step / h normalized loc mapping to input height
-          float s = static_cast<float>(downsample_strides.at(k));
-          float cx = (static_cast<float>(j) + 0.5f) * s / static_cast<float>(w);
-          float cy = (static_cast<float>(i) + 0.5f) * s / static_cast<float>(h);
-          (*anchors).emplace_back(
-              RetinaAnchor{cx, cy, s_kx, s_ky});  // without clip
-        }
-      }
-    }
-  }
-}
-
-RetinaFace::RetinaFace(const std::string& model_file,
-                       const std::string& params_file,
-                       const RuntimeOption& custom_option,
-                       const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool RetinaFace::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  variance = {0.1f, 0.2f};
-  downsample_strides = {8, 16, 32};
-  min_sizes = {{16, 32}, {64, 128}, {256, 512}};
-  landmarks_per_face = 5;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  return true;
-}
-
-bool RetinaFace::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // retinaface's preprocess steps
-  // 1. Resize
-  // 2. Convert(opencv style) or Normalize
-  // 3. HWC->CHW
-  int resize_w = size[0];
-  int resize_h = size[1];
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-
-  // Compute `result = mat * alpha + beta` directly by channel
-  // Reference: detect.py#L94
-  std::vector<float> alpha = {1.f, 1.f, 1.f};
-  std::vector<float> beta = {-104.f, -117.f, -123.f};  // BGR;
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool RetinaFace::Postprocess(
-    std::vector<FDTensor>& infer_result, FaceDetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  // retinaface has 3 output tensors, boxes & conf & landmarks
-  FDASSERT(
-      (infer_result.size() == 3),
-      "The default number of output tensor must be 3 according to retinaface.");
-  FDTensor& boxes_tensor = infer_result.at(0);      // (1,n,4)
-  FDTensor& conf_tensor = infer_result.at(1);       // (1,n,2)
-  FDTensor& landmarks_tensor = infer_result.at(2);  // (1,n,10)
-  FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now.");
-  if (boxes_tensor.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-
-  result->Clear();
-  // must be setup landmarks_per_face before reserve
-  result->landmarks_per_face = landmarks_per_face;
-  result->Reserve(boxes_tensor.shape[1]);
-
-  float* boxes_ptr = static_cast<float*>(boxes_tensor.Data());
-  float* conf_ptr = static_cast<float*>(conf_tensor.Data());
-  float* landmarks_ptr = static_cast<float*>(landmarks_tensor.Data());
-  const size_t num_bboxes = boxes_tensor.shape[1];  // n
-  // fetch original image shape
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT((iter_ipt != im_info.end()),
-           "Cannot find input_shape from im_info.");
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-
-  // generate anchors with dowmsample strides
-  std::vector<RetinaAnchor> anchors;
-  GenerateRetinaAnchors(size, downsample_strides, min_sizes, &anchors);
-
-  // decode bounding boxes
-  for (size_t i = 0; i < num_bboxes; ++i) {
-    float confidence = conf_ptr[2 * i + 1];
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    float prior_cx = anchors.at(i).cx;
-    float prior_cy = anchors.at(i).cy;
-    float prior_s_kx = anchors.at(i).s_kx;
-    float prior_s_ky = anchors.at(i).s_ky;
-
-    // fetch offsets (dx,dy,dw,dh)
-    float dx = boxes_ptr[4 * i + 0];
-    float dy = boxes_ptr[4 * i + 1];
-    float dw = boxes_ptr[4 * i + 2];
-    float dh = boxes_ptr[4 * i + 3];
-    // reference: Pytorch_Retinaface/utils/box_utils.py
-    float x = prior_cx + dx * variance[0] * prior_s_kx;
-    float y = prior_cy + dy * variance[0] * prior_s_ky;
-    float w = prior_s_kx * std::exp(dw * variance[1]);
-    float h = prior_s_ky * std::exp(dh * variance[1]);  // (0.~1.)
-    // from (x,y,w,h) to (x1,y1,x2,y2)
-    float x1 = (x - w / 2.f) * ipt_w;
-    float y1 = (y - h / 2.f) * ipt_h;
-    float x2 = (x + w / 2.f) * ipt_w;
-    float y2 = (y + h / 2.f) * ipt_h;
-    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
-    result->scores.push_back(confidence);
-    // decode landmarks (default 5 landmarks)
-    if (landmarks_per_face > 0) {
-      // reference: utils/box_utils.py#L241
-      for (size_t j = 0; j < landmarks_per_face * 2; j += 2) {
-        float ldx = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 0)];
-        float ldy = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 1)];
-        float lx = (prior_cx + ldx * variance[0] * prior_s_kx) * ipt_w;
-        float ly = (prior_cy + ldy * variance[0] * prior_s_ky) * ipt_h;
-        result->landmarks.emplace_back(std::array<float, 2>{lx, ly});
-      }
-    }
-  }
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale and clip box
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
-    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
-    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
-    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  // scale and clip landmarks
-  for (size_t i = 0; i < result->landmarks.size(); ++i) {
-    result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f);
-    result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f);
-    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
-    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool RetinaFace::Predict(cv::Mat* im, FaceDetectionResult* result,
-                         float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h b/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h
deleted file mode 100644
index e1ef50e2e..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-class FASTDEPLOY_DECL RetinaFace : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  RetinaFace(const std::string& model_file, const std::string& params_file = "",
-             const RuntimeOption& custom_option = RuntimeOption(),
-             const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "Pytorch_Retinaface"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, FaceDetectionResult* result,
-                       float conf_threshold = 0.25f,
-                       float nms_iou_threshold = 0.4f);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height), default (640, 640)
-  std::vector<int> size;
-  // variance in RetinaFace's prior-box(anchor) generate process,
-  // default (0.1, 0.2)
-  std::vector<float> variance;
-  // downsample strides (namely, steps) for RetinaFace to
-  // generate anchors, will take (8,16,32) as default values.
-  std::vector<int> downsample_strides;
-  // min sizes, width and height for each anchor.
-  std::vector<std::vector<int>> min_sizes;
-  // landmarks_per_face, default 5 in RetinaFace
-  int landmarks_per_face;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceDetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  bool is_dynamic_input_;
-};
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc
deleted file mode 100644
index 9419327c4..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindRetinaFace(pybind11::module& m) {
-  pybind11::class_<vision::facedet::RetinaFace, FastDeployModel>(m,
-                                                                 "RetinaFace")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::facedet::RetinaFace& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceDetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::facedet::RetinaFace::size)
-      .def_readwrite("variance", &vision::facedet::RetinaFace::variance)
-      .def_readwrite("downsample_strides",
-                     &vision::facedet::RetinaFace::downsample_strides)
-      .def_readwrite("min_sizes", &vision::facedet::RetinaFace::min_sizes)
-      .def_readwrite("landmarks_per_face",
-                     &vision::facedet::RetinaFace::landmarks_per_face);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc b/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc
deleted file mode 100644
index ffcff65c9..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc
+++ /dev/null
@@ -1,369 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/facedet/contrib/scrfd.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-void SCRFD::LetterBox(Mat* mat, const std::vector<int>& size,
-                      const std::vector<float>& color, bool _auto,
-                      bool scale_fill, bool scale_up, int stride) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-SCRFD::SCRFD(const std::string& model_file, const std::string& params_file,
-             const RuntimeOption& custom_option, const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool SCRFD::Initialize() {
-  // parameters for preprocess
-  use_kps = true;
-  size = {640, 640};
-  padding_value = {0.0, 0.0, 0.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  downsample_strides = {8, 16, 32};
-  num_anchors = 2;
-  landmarks_per_face = 5;
-  center_points_is_update_ = false;
-  max_nms = 30000;
-  // num_outputs = use_kps ? 9 : 6;
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_shape is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-
-  return true;
-}
-
-bool SCRFD::Preprocess(Mat* mat, FDTensor* output,
-                       std::map<std::string, std::array<float, 2>>* im_info) {
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(mat->Height() * ratio);
-    int resize_w = int(mat->Width() * ratio);
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // scrfd's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  SCRFD::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad,
-                   is_scale_up, stride);
-
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  // Original Repo/tools/scrfd.py: cv2.dnn.blobFromImage(img, 1.0/128,
-  // input_size, (127.5, 127.5, 127.5), swapRB=True)
-  std::vector<float> alpha = {1.f / 128.f, 1.f / 128.f, 1.f / 128.f};
-  std::vector<float> beta = {-127.5f / 128.f, -127.5f / 128.f, -127.5f / 128.f};
-  Convert::Run(mat, alpha, beta);
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-void SCRFD::GeneratePoints() {
-  if (center_points_is_update_ && !is_dynamic_input_) {
-    return;
-  }
-  // 8, 16, 32
-  for (auto local_stride : downsample_strides) {
-    unsigned int num_grid_w = size[0] / local_stride;
-    unsigned int num_grid_h = size[1] / local_stride;
-    // y
-    for (unsigned int i = 0; i < num_grid_h; ++i) {
-      // x
-      for (unsigned int j = 0; j < num_grid_w; ++j) {
-        // num_anchors, col major
-        for (unsigned int k = 0; k < num_anchors; ++k) {
-          SCRFDPoint point;
-          point.cx = static_cast<float>(j);
-          point.cy = static_cast<float>(i);
-          center_points_[local_stride].push_back(point);
-        }
-      }
-    }
-  }
-
-  center_points_is_update_ = true;
-}
-
-bool SCRFD::Postprocess(
-    std::vector<FDTensor>& infer_result, FaceDetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  // number of downsample_strides
-  int fmc = downsample_strides.size();
-  // scrfd has 6,9,10,15 output tensors
-  FDASSERT((infer_result.size() == 9 || infer_result.size() == 6 ||
-            infer_result.size() == 10 || infer_result.size() == 15),
-           "The default number of output tensor must be 6, 9, 10, or 15 "
-           "according to scrfd.");
-  FDASSERT((fmc == 3 || fmc == 5), "The fmc must be 3 or 5");
-  FDASSERT((infer_result.at(0).shape[0] == 1), "Only support batch =1 now.");
-  for (int i = 0; i < fmc; ++i) {
-    if (infer_result.at(i).dtype != FDDataType::FP32) {
-      FDERROR << "Only support post process with float32 data." << std::endl;
-      return false;
-    }
-  }
-  int total_num_boxes = 0;
-  // compute the reserve space.
-  for (int f = 0; f < fmc; ++f) {
-    total_num_boxes += infer_result.at(f).shape[1];
-  };
-  GeneratePoints();
-  result->Clear();
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.0f;
-  float pad_w = (out_w - ipt_w * scale) / 2.0f;
-  if (is_mini_pad) {
-    // 和 LetterBox中_auto=true的处理逻辑对应
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  // must be setup landmarks_per_face before reserve
-  result->landmarks_per_face = landmarks_per_face;
-  result->Reserve(total_num_boxes);
-  unsigned int count = 0;
-  // loop each stride
-  for (int f = 0; f < fmc; ++f) {
-    float* score_ptr = static_cast<float*>(infer_result.at(f).Data());
-    float* bbox_ptr = static_cast<float*>(infer_result.at(f + fmc).Data());
-    const unsigned int num_points = infer_result.at(f).shape[1];
-    int current_stride = downsample_strides[f];
-    auto& stride_points = center_points_[current_stride];
-    // loop each anchor
-    for (unsigned int i = 0; i < num_points; ++i) {
-      const float cls_conf = score_ptr[i];
-      if (cls_conf < conf_threshold) continue;  // filter
-      auto& point = stride_points.at(i);
-      const float cx = point.cx;  // cx
-      const float cy = point.cy;  // cy
-      // bbox
-      const float* offsets = bbox_ptr + i * 4;
-      float l = offsets[0];  // left
-      float t = offsets[1];  // top
-      float r = offsets[2];  // right
-      float b = offsets[3];  // bottom
-
-      float x1 = ((cx - l) * static_cast<float>(current_stride) -
-                  static_cast<float>(pad_w)) /
-                 scale;  // cx - l x1
-      float y1 = ((cy - t) * static_cast<float>(current_stride) -
-                  static_cast<float>(pad_h)) /
-                 scale;  // cy - t y1
-      float x2 = ((cx + r) * static_cast<float>(current_stride) -
-                  static_cast<float>(pad_w)) /
-                 scale;  // cx + r x2
-      float y2 = ((cy + b) * static_cast<float>(current_stride) -
-                  static_cast<float>(pad_h)) /
-                 scale;  // cy + b y2
-      result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
-      result->scores.push_back(cls_conf);
-      if (use_kps) {
-        float* landmarks_ptr =
-            static_cast<float*>(infer_result.at(f + 2 * fmc).Data());
-        // landmarks
-        const float* kps_offsets = landmarks_ptr + i * (landmarks_per_face * 2);
-        for (unsigned int j = 0; j < landmarks_per_face * 2; j += 2) {
-          float kps_l = kps_offsets[j];
-          float kps_t = kps_offsets[j + 1];
-          float kps_x = ((cx + kps_l) * static_cast<float>(current_stride) -
-                         static_cast<float>(pad_w)) /
-                        scale;  // cx + l x
-          float kps_y = ((cy + kps_t) * static_cast<float>(current_stride) -
-                         static_cast<float>(pad_h)) /
-                        scale;  // cy + t y
-          result->landmarks.emplace_back(std::array<float, 2>{kps_x, kps_y});
-        }
-      }
-      count += 1;  // limit boxes for nms.
-      if (count > max_nms) {
-        break;
-      }
-    }
-  }
-
-  // fetch original image shape
-  FDASSERT((iter_ipt != im_info.end()),
-           "Cannot find input_shape from im_info.");
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale and clip box
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
-    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
-    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
-    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  // scale and clip landmarks
-  for (size_t i = 0; i < result->landmarks.size(); ++i) {
-    result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f);
-    result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f);
-    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
-    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool SCRFD::Predict(cv::Mat* im, FaceDetectionResult* result,
-                    float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h b/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h
deleted file mode 100644
index 398301363..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <unordered_map>
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-class FASTDEPLOY_DECL SCRFD : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  SCRFD(const std::string& model_file, const std::string& params_file = "",
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "scrfd"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, FaceDetectionResult* result,
-                       float conf_threshold = 0.25f,
-                       float nms_iou_threshold = 0.4f);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height), default (640, 640)
-  std::vector<int> size;
-  // downsample strides (namely, steps) for SCRFD to
-  // generate anchors, will take (8,16,32) as default values.
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // for offseting the boxes by classes when using NMS
-  std::vector<int> downsample_strides;
-  // landmarks_per_face, default 5 in SCRFD
-  int landmarks_per_face;
-  // are the outputs of onnx file with key points features or not
-  bool use_kps;
-  // the upperbond number of boxes processed by nms.
-  int max_nms;
-  // number anchors of each stride
-  unsigned int num_anchors;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceDetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  void GeneratePoints();
-
-  // 对图片进行LetterBox处理
-  // mat 为读取到的原图
-  // size 为输入模型的图像尺寸
-  void LetterBox(Mat* mat, const std::vector<int>& size,
-                 const std::vector<float>& color, bool _auto,
-                 bool scale_fill = false, bool scale_up = true,
-                 int stride = 32);
-
-  bool is_dynamic_input_;
-
-  bool center_points_is_update_;
-
-  typedef struct {
-    float cx;
-    float cy;
-  } SCRFDPoint;
-
-  std::unordered_map<int, std::vector<SCRFDPoint>> center_points_;
-};
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc
deleted file mode 100644
index 7cfa4d025..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindSCRFD(pybind11::module& m) {
-  // Bind SCRFD
-  pybind11::class_<vision::facedet::SCRFD, FastDeployModel>(m, "SCRFD")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::facedet::SCRFD& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceDetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::facedet::SCRFD::size)
-      .def_readwrite("padding_value", &vision::facedet::SCRFD::padding_value)
-      .def_readwrite("is_mini_pad", &vision::facedet::SCRFD::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::facedet::SCRFD::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::facedet::SCRFD::is_scale_up)
-      .def_readwrite("stride", &vision::facedet::SCRFD::stride)
-      .def_readwrite("use_kps", &vision::facedet::SCRFD::use_kps)
-      .def_readwrite("max_nms", &vision::facedet::SCRFD::max_nms)
-      .def_readwrite("downsample_strides",
-                     &vision::facedet::SCRFD::downsample_strides)
-      .def_readwrite("num_anchors", &vision::facedet::SCRFD::num_anchors)
-      .def_readwrite("landmarks_per_face",
-                     &vision::facedet::SCRFD::landmarks_per_face);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc b/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc
deleted file mode 100644
index ed4962306..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/facedet/contrib/ultraface.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-UltraFace::UltraFace(const std::string& model_file,
-                     const std::string& params_file,
-                     const RuntimeOption& custom_option,
-                     const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool UltraFace::Initialize() {
-  // parameters for preprocess
-  size = {320, 240};
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  return true;
-}
-
-bool UltraFace::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // ultraface's preprocess steps
-  // 1. resize
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  int resize_w = size[0];
-  int resize_h = size[1];
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-
-  BGR2RGB::Run(mat);
-  // Compute `result = mat * alpha + beta` directly by channel
-  // Reference: detect_imgs_onnx.py#L73
-  std::vector<float> alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
-  std::vector<float> beta = {-127.0f * (1.0f / 128.0f),
-                             -127.0f * (1.0f / 128.0f),
-                             -127.0f * (1.0f / 128.0f)};  // RGB;
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool UltraFace::Postprocess(
-    std::vector<FDTensor>& infer_result, FaceDetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  // ultraface has 2 output tensors, scores & boxes
-  FDASSERT(
-      (infer_result.size() == 2),
-      "The default number of output tensor must be 2 according to ultraface.");
-  FDTensor& scores_tensor = infer_result.at(0);  // (1,4420,2)
-  FDTensor& boxes_tensor = infer_result.at(1);   // (1,4420,4)
-  FDASSERT((scores_tensor.shape[0] == 1), "Only support batch =1 now.");
-  FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now.");
-  if (scores_tensor.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-  if (boxes_tensor.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-
-  result->Clear();
-  // must be setup landmarks_per_face before reserve.
-  // ultraface detector does not detect landmarks by default.
-  result->landmarks_per_face = 0;
-  result->Reserve(boxes_tensor.shape[1]);
-
-  float* scores_ptr = static_cast<float*>(scores_tensor.Data());
-  float* boxes_ptr = static_cast<float*>(boxes_tensor.Data());
-  const size_t num_bboxes = boxes_tensor.shape[1];  // e.g 4420
-  // fetch original image shape
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT((iter_ipt != im_info.end()),
-           "Cannot find input_shape from im_info.");
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-
-  // decode bounding boxes
-  for (size_t i = 0; i < num_bboxes; ++i) {
-    float confidence = scores_ptr[2 * i + 1];
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    float x1 = boxes_ptr[4 * i + 0] * ipt_w;
-    float y1 = boxes_ptr[4 * i + 1] * ipt_h;
-    float x2 = boxes_ptr[4 * i + 2] * ipt_w;
-    float y2 = boxes_ptr[4 * i + 3] * ipt_h;
-    result->boxes.emplace_back(std::array<float, 4>{x1, y1, x2, y2});
-    result->scores.push_back(confidence);
-  }
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale and clip box
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f);
-    result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f);
-    result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f);
-    result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool UltraFace::Predict(cv::Mat* im, FaceDetectionResult* result,
-                        float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors, result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h b/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h
deleted file mode 100644
index 387bc1f9a..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-class FASTDEPLOY_DECL UltraFace : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  UltraFace(const std::string& model_file, const std::string& params_file = "",
-            const RuntimeOption& custom_option = RuntimeOption(),
-            const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const {
-    return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB";
-  }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, FaceDetectionResult* result,
-                       float conf_threshold = 0.7f,
-                       float nms_iou_threshold = 0.3f);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height), default (320, 240)
-  std::vector<int> size;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceDetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  bool is_dynamic_input_;
-};
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc
deleted file mode 100644
index 855c26908..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindUltraFace(pybind11::module& m) {
-  pybind11::class_<vision::facedet::UltraFace, FastDeployModel>(m, "UltraFace")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::facedet::UltraFace& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceDetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::facedet::UltraFace::size);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc
deleted file mode 100644
index 96af230b0..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc
+++ /dev/null
@@ -1,294 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/facedet/contrib/yolov5face.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
-               bool _auto, bool scale_fill = false, bool scale_up = true,
-               int stride = 32) {
-  float scale =
-      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
-  if (!scale_up) {
-    scale = std::min(scale, 1.0f);
-  }
-
-  int resize_h = int(round(mat->Height() * scale));
-  int resize_w = int(round(mat->Width() * scale));
-
-  int pad_w = size[0] - resize_w;
-  int pad_h = size[1] - resize_h;
-  if (_auto) {
-    pad_h = pad_h % stride;
-    pad_w = pad_w % stride;
-  } else if (scale_fill) {
-    pad_h = 0;
-    pad_w = 0;
-    resize_h = size[1];
-    resize_w = size[0];
-  }
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (pad_h > 0 || pad_w > 0) {
-    float half_h = pad_h * 1.0 / 2;
-    int top = int(round(half_h - 0.1));
-    int bottom = int(round(half_h + 0.1));
-    float half_w = pad_w * 1.0 / 2;
-    int left = int(round(half_w - 0.1));
-    int right = int(round(half_w + 0.1));
-    Pad::Run(mat, top, bottom, left, right, color);
-  }
-}
-
-YOLOv5Face::YOLOv5Face(const std::string& model_file,
-                       const std::string& params_file,
-                       const RuntimeOption& custom_option,
-                       const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool YOLOv5Face::Initialize() {
-  // parameters for preprocess
-  size = {640, 640};
-  padding_value = {114.0, 114.0, 114.0};
-  is_mini_pad = false;
-  is_no_pad = false;
-  is_scale_up = false;
-  stride = 32;
-  landmarks_per_face = 5;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // Check if the input shape is dynamic after Runtime already initialized,
-  // Note that, We need to force is_mini_pad 'false' to keep static
-  // shape after padding (LetterBox) when the is_dynamic_input_ is 'false'.
-  is_dynamic_input_ = false;
-  auto shape = InputInfoOfRuntime(0).shape;
-  for (int i = 0; i < shape.size(); ++i) {
-    // if height or width is dynamic
-    if (i >= 2 && shape[i] <= 0) {
-      is_dynamic_input_ = true;
-      break;
-    }
-  }
-  if (!is_dynamic_input_) {
-    is_mini_pad = false;
-  }
-  return true;
-}
-
-bool YOLOv5Face::Preprocess(
-    Mat* mat, FDTensor* output,
-    std::map<std::string, std::array<float, 2>>* im_info) {
-  // process after image load
-  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
-                         size[0] * 1.0f / static_cast<float>(mat->Width()));
-  if (ratio != 1.0) {  // always true
-    int interp = cv::INTER_AREA;
-    if (ratio > 1.0) {
-      interp = cv::INTER_LINEAR;
-    }
-    int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
-    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
-    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
-  }
-  // yolov5face's preprocess steps
-  // 1. letterbox
-  // 2. BGR->RGB
-  // 3. HWC->CHW
-  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
-            stride);
-  BGR2RGB::Run(mat);
-  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
-  //                std::vector<float>(mat->Channels(), 1.0));
-  // Compute `result = mat * alpha + beta` directly by channel
-  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
-  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
-  Convert::Run(mat, alpha, beta);
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
-                                static_cast<float>(mat->Width())};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool YOLOv5Face::Postprocess(
-    FDTensor& infer_result, FaceDetectionResult* result,
-    const std::map<std::string, std::array<float, 2>>& im_info,
-    float conf_threshold, float nms_iou_threshold) {
-  // infer_result: (1,n,16) 16=4+1+10+1
-  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
-  if (infer_result.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-
-  result->Clear();
-  // must be setup landmarks_per_face before reserve
-  result->landmarks_per_face = landmarks_per_face;
-  result->Reserve(infer_result.shape[1]);
-
-  float* data = static_cast<float*>(infer_result.Data());
-  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
-    float* reg_cls_ptr = data + (i * infer_result.shape[2]);
-    float obj_conf = reg_cls_ptr[4];
-    float cls_conf = reg_cls_ptr[15];
-    float confidence = obj_conf * cls_conf;
-    // filter boxes by conf_threshold
-    if (confidence <= conf_threshold) {
-      continue;
-    }
-    float x = reg_cls_ptr[0];
-    float y = reg_cls_ptr[1];
-    float w = reg_cls_ptr[2];
-    float h = reg_cls_ptr[3];
-
-    // convert from [x, y, w, h] to [x1, y1, x2, y2]
-    result->boxes.emplace_back(std::array<float, 4>{
-        (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
-    result->scores.push_back(confidence);
-    // decode landmarks (default 5 landmarks)
-    if (landmarks_per_face > 0) {
-      float* landmarks_ptr = reg_cls_ptr + 5;
-      for (size_t j = 0; j < landmarks_per_face * 2; j += 2) {
-        result->landmarks.emplace_back(
-            std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
-      }
-    }
-  }
-
-  if (result->boxes.size() == 0) {
-    return true;
-  }
-
-  utils::NMS(result, nms_iou_threshold);
-
-  // scale the boxes to the origin image shape
-  auto iter_out = im_info.find("output_shape");
-  auto iter_ipt = im_info.find("input_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  float out_h = iter_out->second[0];
-  float out_w = iter_out->second[1];
-  float ipt_h = iter_ipt->second[0];
-  float ipt_w = iter_ipt->second[1];
-  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
-  float pad_h = (out_h - ipt_h * scale) / 2.f;
-  float pad_w = (out_w - ipt_w * scale) / 2.f;
-  if (is_mini_pad) {
-    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
-    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
-  }
-  // scale and clip box
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
-    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
-    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
-    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
-    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
-    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
-    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
-    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
-  }
-  // scale and clip landmarks
-  for (size_t i = 0; i < result->landmarks.size(); ++i) {
-    result->landmarks[i][0] =
-        std::max((result->landmarks[i][0] - pad_w) / scale, 0.0f);
-    result->landmarks[i][1] =
-        std::max((result->landmarks[i][1] - pad_h) / scale, 0.0f);
-    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
-    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
-  }
-  return true;
-}
-
-bool YOLOv5Face::Predict(cv::Mat* im, FaceDetectionResult* result,
-                         float conf_threshold, float nms_iou_threshold) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<float, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<float>(mat.Height()),
-                            static_cast<float>(mat.Width())};
-  im_info["output_shape"] = {static_cast<float>(mat.Height()),
-                             static_cast<float>(mat.Width())};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
-                   nms_iou_threshold)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h
deleted file mode 100644
index 017c9681a..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace facedet {
-
-class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  YOLOv5Face(const std::string& model_file, const std::string& params_file = "",
-             const RuntimeOption& custom_option = RuntimeOption(),
-             const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "yolov5-face"; }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  // conf_threshold 为后处理的参数
-  // nms_iou_threshold 为后处理的参数
-  virtual bool Predict(cv::Mat* im, FaceDetectionResult* result,
-                       float conf_threshold = 0.25,
-                       float nms_iou_threshold = 0.5);
-
-  // 以下为模型在预测时的一些参数，基本是前后处理所需
-  // 用户在创建模型后，可根据模型的要求，以及自己的需求
-  // 对参数进行修改
-  // tuple of (width, height)
-  std::vector<int> size;
-  // padding value, size should be same with Channels
-  std::vector<float> padding_value;
-  // only pad to the minimum rectange which height and width is times of stride
-  bool is_mini_pad;
-  // while is_mini_pad = false and is_no_pad = true, will resize the image to
-  // the set size
-  bool is_no_pad;
-  // if is_scale_up is false, the input image only can be zoom out, the maximum
-  // resize scale cannot exceed 1.0
-  bool is_scale_up;
-  // padding stride, for is_mini_pad
-  int stride;
-  // setup the number of landmarks for per face (if have), default 5 in
-  // official yolov5face note that, the outupt tensor's shape must be:
-  // (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls)
-  int landmarks_per_face;
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  // im_info为预处理过程保存的数据，在后处理中需要用到
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<float, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  // im_info 为预处理记录的信息，后处理用于还原box
-  // conf_threshold 后处理时过滤box的置信度阈值
-  // nms_iou_threshold 后处理时NMS设定的iou阈值
-  bool Postprocess(FDTensor& infer_result, FaceDetectionResult* result,
-                   const std::map<std::string, std::array<float, 2>>& im_info,
-                   float conf_threshold, float nms_iou_threshold);
-
-  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
-  bool IsDynamicInput() const { return is_dynamic_input_; }
-
-  bool is_dynamic_input_;
-};
-
-}  // namespace facedet
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc
deleted file mode 100644
index b843d4a9f..000000000
--- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindYOLOv5Face(pybind11::module& m) {
-  pybind11::class_<vision::facedet::YOLOv5Face, FastDeployModel>(m,
-                                                                 "YOLOv5Face")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::facedet::YOLOv5Face& self, pybind11::array& data,
-              float conf_threshold, float nms_iou_threshold) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceDetectionResult res;
-             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
-             return res;
-           })
-      .def_readwrite("size", &vision::facedet::YOLOv5Face::size)
-      .def_readwrite("padding_value",
-                     &vision::facedet::YOLOv5Face::padding_value)
-      .def_readwrite("is_mini_pad", &vision::facedet::YOLOv5Face::is_mini_pad)
-      .def_readwrite("is_no_pad", &vision::facedet::YOLOv5Face::is_no_pad)
-      .def_readwrite("is_scale_up", &vision::facedet::YOLOv5Face::is_scale_up)
-      .def_readwrite("stride", &vision::facedet::YOLOv5Face::stride)
-      .def_readwrite("landmarks_per_face",
-                     &vision::facedet::YOLOv5Face::landmarks_per_face);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc b/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc
deleted file mode 100644
index 3d9a812af..000000000
--- a/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindRetinaFace(pybind11::module& m);
-void BindUltraFace(pybind11::module& m);
-void BindYOLOv5Face(pybind11::module& m);
-void BindSCRFD(pybind11::module& m);
-
-void BindFaceDet(pybind11::module& m) {
-  auto facedet_module = m.def_submodule("facedet", "Face detection models.");
-  BindRetinaFace(facedet_module);
-  BindUltraFace(facedet_module);
-  BindYOLOv5Face(facedet_module);
-  BindSCRFD(facedet_module);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc b/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc
deleted file mode 100644
index 9c2b64763..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/faceid/contrib/arcface.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-ArcFace::ArcFace(const std::string& model_file, const std::string& params_file,
-                 const RuntimeOption& custom_option,
-                 const Frontend& model_format)
-    : InsightFaceRecognitionModel(model_file, params_file, custom_option,
-                                  model_format) {
-  initialized = Initialize();
-}
-
-bool ArcFace::Initialize() {
-  // 如果初始化有变化 修改该子类函数
-  // 这里需要判断backend是否已经initialized，如果是，则不应该再调用
-  // InsightFaceRecognitionModel::Initialize()
-  // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化
-  // 这里只修改一些模型相关的属性
-
-  // (1) 如果父类初始化了backend
-  if (initialized) {
-    // (1.1) re-init parameters for specific sub-classes
-    size = {112, 112};
-    alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-    beta = {-1.f, -1.f, -1.f};  // RGB
-    swap_rb = true;
-    l2_normalize = false;
-    return true;
-  }
-  // (2) 如果父类没有初始化backend
-  if (!InsightFaceRecognitionModel::Initialize()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // (2.1) re-init parameters for specific sub-classes
-  size = {112, 112};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-  l2_normalize = false;
-  return true;
-}
-
-bool ArcFace::Preprocess(Mat* mat, FDTensor* output) {
-  // 如果预处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Preprocess(mat, output);
-}
-
-bool ArcFace::Postprocess(std::vector<FDTensor>& infer_result,
-                          FaceRecognitionResult* result) {
-  // 如果后处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Postprocess(infer_result, result);
-}
-
-bool ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result) {
-  // 如果前后处理有变化 则override子类的Preprocess和Postprocess
-  // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess
-  return InsightFaceRecognitionModel::Predict(im, result);
-}
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface.h b/csrcs/fastdeploy/vision/faceid/contrib/arcface.h
deleted file mode 100644
index 698fadceb..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/arcface.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干
-  ArcFace(const std::string& model_file, const std::string& params_file = "",
-          const RuntimeOption& custom_option = RuntimeOption(),
-          const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const override {
-    return "deepinsight/insightface/recognition/arcface_pytorch";
-  }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  bool Predict(cv::Mat* im, FaceRecognitionResult* result) override;
-  // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize() override;
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  bool Preprocess(Mat* mat, FDTensor* output) override;
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceRecognitionResult* result) override;
-};
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc
deleted file mode 100644
index cd9bf7c57..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindArcFace(pybind11::module& m) {
-  // Bind ArcFace
-  pybind11::class_<vision::faceid::ArcFace,
-                   vision::faceid::InsightFaceRecognitionModel>(m, "ArcFace")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::faceid::ArcFace& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceRecognitionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::faceid::ArcFace::size)
-      .def_readwrite("alpha", &vision::faceid::ArcFace::alpha)
-      .def_readwrite("beta", &vision::faceid::ArcFace::beta)
-      .def_readwrite("swap_rb", &vision::faceid::ArcFace::swap_rb)
-      .def_readwrite("l2_normalize", &vision::faceid::ArcFace::l2_normalize);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc b/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc
deleted file mode 100644
index 4a4d6dc55..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/faceid/contrib/cosface.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-CosFace::CosFace(const std::string& model_file, const std::string& params_file,
-                 const RuntimeOption& custom_option,
-                 const Frontend& model_format)
-    : InsightFaceRecognitionModel(model_file, params_file, custom_option,
-                                  model_format) {
-  initialized = Initialize();
-}
-
-bool CosFace::Initialize() {
-  // 如果初始化有变化 修改该子类函数
-  // 这里需要判断backend是否已经initialized，如果是，则不应该再调用
-  // InsightFaceRecognitionModel::Initialize()
-  // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化
-  // 这里只修改一些模型相关的属性
-
-  // (1) 如果父类初始化了backend
-  if (initialized) {
-    // (1.1) re-init parameters for specific sub-classes
-    size = {112, 112};
-    alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-    beta = {-1.f, -1.f, -1.f};  // RGB
-    swap_rb = true;
-    l2_normalize = false;
-    return true;
-  }
-  // (2) 如果父类没有初始化backend
-  if (!InsightFaceRecognitionModel::Initialize()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // (2.1) re-init parameters for specific sub-classes
-  size = {112, 112};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-  l2_normalize = false;
-  return true;
-}
-
-bool CosFace::Preprocess(Mat* mat, FDTensor* output) {
-  // 如果预处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Preprocess(mat, output);
-}
-
-bool CosFace::Postprocess(std::vector<FDTensor>& infer_result,
-                          FaceRecognitionResult* result) {
-  // 如果后处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Postprocess(infer_result, result);
-}
-
-bool CosFace::Predict(cv::Mat* im, FaceRecognitionResult* result) {
-  // 如果前后处理有变化 则override子类的Preprocess和Postprocess
-  // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess
-  return InsightFaceRecognitionModel::Predict(im, result);
-}
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface.h b/csrcs/fastdeploy/vision/faceid/contrib/cosface.h
deleted file mode 100644
index 92704536c..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/cosface.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干
-  CosFace(const std::string& model_file, const std::string& params_file = "",
-          const RuntimeOption& custom_option = RuntimeOption(),
-          const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  // insightface/arcface提供的模型文件包含了cosface
-  std::string ModelName() const override {
-    return "deepinsight/insightface/recognition/arcface_pytorch";
-  }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  bool Predict(cv::Mat* im, FaceRecognitionResult* result) override;
-  // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize() override;
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  bool Preprocess(Mat* mat, FDTensor* output) override;
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceRecognitionResult* result) override;
-};
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc
deleted file mode 100644
index c09f9e723..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindCosFace(pybind11::module& m) {
-  // Bind CosFace
-  pybind11::class_<vision::faceid::CosFace,
-                   vision::faceid::InsightFaceRecognitionModel>(m, "CosFace")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::faceid::CosFace& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceRecognitionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::faceid::CosFace::size)
-      .def_readwrite("alpha", &vision::faceid::CosFace::alpha)
-      .def_readwrite("beta", &vision::faceid::CosFace::beta)
-      .def_readwrite("swap_rb", &vision::faceid::CosFace::swap_rb)
-      .def_readwrite("l2_normalize", &vision::faceid::CosFace::l2_normalize);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc
deleted file mode 100644
index ddd7520d4..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-InsightFaceRecognitionModel::InsightFaceRecognitionModel(
-    const std::string& model_file, const std::string& params_file,
-    const RuntimeOption& custom_option, const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool InsightFaceRecognitionModel::Initialize() {
-  // parameters for preprocess
-  size = {112, 112};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-  l2_normalize = false;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool InsightFaceRecognitionModel::Preprocess(Mat* mat, FDTensor* output) {
-  // face recognition model's preprocess steps in insightface
-  // reference: insightface/recognition/arcface_torch/inference.py
-  // 1. Resize
-  // 2. BGR2RGB
-  // 3. Convert(opencv style) or Normalize
-  // 4. HWC2CHW
-  int resize_w = size[0];
-  int resize_h = size[1];
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (swap_rb) {
-    BGR2RGB::Run(mat);
-  }
-
-  Convert::Run(mat, alpha, beta);
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool InsightFaceRecognitionModel::Postprocess(
-    std::vector<FDTensor>& infer_result, FaceRecognitionResult* result) {
-  FDASSERT((infer_result.size() == 1),
-           "The default number of output tensor must be 1 according to "
-           "insightface.");
-  FDTensor& embedding_tensor = infer_result.at(0);
-  FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch =1 now.");
-  if (embedding_tensor.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-
-  result->Clear();
-  result->Resize(embedding_tensor.Numel());
-  // Copy the raw embedding vector directly without L2 normalize
-  // post process. Let the user decide whether to normalize or not.
-  // Will call utils::L2Normlize() method to perform L2
-  // normalize if l2_normalize was set as 'true'.
-  std::memcpy(result->embedding.data(), embedding_tensor.Data(),
-              embedding_tensor.Nbytes());
-  if (l2_normalize) {
-    auto norm_embedding = utils::L2Normalize(result->embedding);
-    std::memcpy(result->embedding.data(), norm_embedding.data(),
-                embedding_tensor.Nbytes());
-  }
-  return true;
-}
-
-bool InsightFaceRecognitionModel::Predict(cv::Mat* im,
-                                          FaceRecognitionResult* result) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  if (!Preprocess(&mat, &input_tensors[0])) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors, result)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h
deleted file mode 100644
index b8eb27262..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  // 支持insightface/recognition人脸识别模型的基类
-  InsightFaceRecognitionModel(
-      const std::string& model_file, const std::string& params_file = "",
-      const RuntimeOption& custom_option = RuntimeOption(),
-      const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  virtual std::string ModelName() const { return "deepinsight/insightface"; }
-
-  // 以下为一些可供用户修改的属性
-  // tuple of (width, height), default (112, 112)
-  std::vector<int> size;
-  // 归一化的 alpha 和 beta，x'=x*alpha+beta
-  std::vector<float> alpha;
-  std::vector<float> beta;
-  // whether to swap the B and R channel, such as BGR->RGB, default true.
-  bool swap_rb;
-  // whether to apply l2 normalize to embedding values, default;
-  bool l2_normalize;
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  virtual bool Predict(cv::Mat* im, FaceRecognitionResult* result);
-
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  virtual bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  virtual bool Preprocess(Mat* mat, FDTensor* output);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  virtual bool Postprocess(std::vector<FDTensor>& infer_result,
-                           FaceRecognitionResult* result);
-};
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc
deleted file mode 100644
index 78df369bb..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindInsightFaceRecognitionModel(pybind11::module& m) {
-  // Bind InsightFaceRecognitionModel
-  pybind11::class_<vision::faceid::InsightFaceRecognitionModel,
-                   FastDeployModel>(m, "InsightFaceRecognitionModel")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::faceid::InsightFaceRecognitionModel& self,
-              pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceRecognitionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::faceid::InsightFaceRecognitionModel::size)
-      .def_readwrite("alpha",
-                     &vision::faceid::InsightFaceRecognitionModel::alpha)
-      .def_readwrite("beta", &vision::faceid::InsightFaceRecognitionModel::beta)
-      .def_readwrite("swap_rb",
-                     &vision::faceid::InsightFaceRecognitionModel::swap_rb)
-      .def_readwrite(
-          "l2_normalize",
-          &vision::faceid::InsightFaceRecognitionModel::l2_normalize);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc
deleted file mode 100644
index 8f13226cb..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/faceid/contrib/partial_fc.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-PartialFC::PartialFC(const std::string& model_file,
-                     const std::string& params_file,
-                     const RuntimeOption& custom_option,
-                     const Frontend& model_format)
-    : InsightFaceRecognitionModel(model_file, params_file, custom_option,
-                                  model_format) {
-  initialized = Initialize();
-}
-
-bool PartialFC::Initialize() {
-  // 如果初始化有变化 修改该子类函数
-  // 这里需要判断backend是否已经initialized，如果是，则不应该再调用
-  // InsightFaceRecognitionModel::Initialize()
-  // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化
-  // 这里只修改一些模型相关的属性
-
-  // (1) 如果父类初始化了backend
-  if (initialized) {
-    // (1.1) re-init parameters for specific sub-classes
-    size = {112, 112};
-    alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-    beta = {-1.f, -1.f, -1.f};  // RGB
-    swap_rb = true;
-    l2_normalize = false;
-    return true;
-  }
-  // (2) 如果父类没有初始化backend
-  if (!InsightFaceRecognitionModel::Initialize()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // (2.1) re-init parameters for specific sub-classes
-  size = {112, 112};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-  l2_normalize = false;
-  return true;
-}
-
-bool PartialFC::Preprocess(Mat* mat, FDTensor* output) {
-  // 如果预处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Preprocess(mat, output);
-}
-
-bool PartialFC::Postprocess(std::vector<FDTensor>& infer_result,
-                            FaceRecognitionResult* result) {
-  // 如果后处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Postprocess(infer_result, result);
-}
-
-bool PartialFC::Predict(cv::Mat* im, FaceRecognitionResult* result) {
-  // 如果前后处理有变化 则override子类的Preprocess和Postprocess
-  // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess
-  return InsightFaceRecognitionModel::Predict(im, result);
-}
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h
deleted file mode 100644
index 88a1f2a2a..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  PartialFC(const std::string& model_file, const std::string& params_file = "",
-            const RuntimeOption& custom_option = RuntimeOption(),
-            const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const override {
-    return "deepinsight/insightface/recognition/partial_fc";
-  }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  bool Predict(cv::Mat* im, FaceRecognitionResult* result) override;
-  // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize() override;
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  bool Preprocess(Mat* mat, FDTensor* output) override;
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceRecognitionResult* result) override;
-};
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc
deleted file mode 100644
index b8cb31358..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindPartialFC(pybind11::module& m) {
-  // Bind Partial FC
-  pybind11::class_<vision::faceid::PartialFC,
-                   vision::faceid::InsightFaceRecognitionModel>(m, "PartialFC")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::faceid::PartialFC& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceRecognitionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::faceid::PartialFC::size)
-      .def_readwrite("alpha", &vision::faceid::PartialFC::alpha)
-      .def_readwrite("beta", &vision::faceid::PartialFC::beta)
-      .def_readwrite("swap_rb", &vision::faceid::PartialFC::swap_rb)
-      .def_readwrite("l2_normalize", &vision::faceid::PartialFC::l2_normalize);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc b/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc
deleted file mode 100644
index bb34d3993..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/faceid/contrib/vpl.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-VPL::VPL(const std::string& model_file, const std::string& params_file,
-         const RuntimeOption& custom_option, const Frontend& model_format)
-    : InsightFaceRecognitionModel(model_file, params_file, custom_option,
-                                  model_format) {
-  initialized = Initialize();
-}
-
-bool VPL::Initialize() {
-  // 如果初始化有变化 修改该子类函数
-  // 这里需要判断backend是否已经initialized，如果是，则不应该再调用
-  // InsightFaceRecognitionModel::Initialize()
-  // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化
-  // 这里只修改一些模型相关的属性
-
-  // (1) 如果父类初始化了backend
-  if (initialized) {
-    // (1.1) re-init parameters for specific sub-classes
-    size = {112, 112};
-    alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-    beta = {-1.f, -1.f, -1.f};  // RGB
-    swap_rb = true;
-    l2_normalize = false;
-    return true;
-  }
-  // (2) 如果父类没有初始化backend
-  if (!InsightFaceRecognitionModel::Initialize()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  // (2.1) re-init parameters for specific sub-classes
-  size = {112, 112};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-  l2_normalize = false;
-  return true;
-}
-
-bool VPL::Preprocess(Mat* mat, FDTensor* output) {
-  // 如果预处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Preprocess(mat, output);
-}
-
-bool VPL::Postprocess(std::vector<FDTensor>& infer_result,
-                      FaceRecognitionResult* result) {
-  // 如果后处理有变化 修改该子类函数
-  return InsightFaceRecognitionModel::Postprocess(infer_result, result);
-}
-
-bool VPL::Predict(cv::Mat* im, FaceRecognitionResult* result) {
-  // 如果前后处理有变化 则override子类的Preprocess和Postprocess
-  // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess
-  return InsightFaceRecognitionModel::Predict(im, result);
-}
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl.h b/csrcs/fastdeploy/vision/faceid/contrib/vpl.h
deleted file mode 100644
index 696d13ac3..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/vpl.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace faceid {
-
-class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  // VPL支持IResNet, IResNet1024骨干
-  VPL(const std::string& model_file, const std::string& params_file = "",
-      const RuntimeOption& custom_option = RuntimeOption(),
-      const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const override {
-    return "deepinsight/insightface/recognition/vpl";
-  }
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  bool Predict(cv::Mat* im, FaceRecognitionResult* result) override;
-  // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize() override;
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  bool Preprocess(Mat* mat, FDTensor* output) override;
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  bool Postprocess(std::vector<FDTensor>& infer_result,
-                   FaceRecognitionResult* result) override;
-};
-
-}  // namespace faceid
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc
deleted file mode 100644
index 448cf3d3b..000000000
--- a/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindVPL(pybind11::module& m) {
-  // Bind VPL
-  pybind11::class_<vision::faceid::VPL,
-                   vision::faceid::InsightFaceRecognitionModel>(m, "VPL")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::faceid::VPL& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::FaceRecognitionResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::faceid::VPL::size)
-      .def_readwrite("alpha", &vision::faceid::VPL::alpha)
-      .def_readwrite("beta", &vision::faceid::VPL::beta)
-      .def_readwrite("swap_rb", &vision::faceid::VPL::swap_rb)
-      .def_readwrite("l2_normalize", &vision::faceid::VPL::l2_normalize);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc b/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc
deleted file mode 100644
index 40a1c6727..000000000
--- a/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindArcFace(pybind11::module& m);
-void BindInsightFaceRecognitionModel(pybind11::module& m);
-void BindCosFace(pybind11::module& m);
-void BindPartialFC(pybind11::module& m);
-void BindVPL(pybind11::module& m);
-
-void BindFaceId(pybind11::module& m) {
-  auto faceid_module = m.def_submodule("faceid", "Face recognition models.");
-  BindInsightFaceRecognitionModel(faceid_module);
-  BindArcFace(faceid_module);
-  BindCosFace(faceid_module);
-  BindPartialFC(faceid_module);
-  BindVPL(faceid_module);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet.cc b/csrcs/fastdeploy/vision/matting/contrib/modnet.cc
deleted file mode 100644
index b98d055e3..000000000
--- a/csrcs/fastdeploy/vision/matting/contrib/modnet.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/matting/contrib/modnet.h"
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace matting {
-
-MODNet::MODNet(const std::string& model_file, const std::string& params_file,
-               const RuntimeOption& custom_option,
-               const Frontend& model_format) {
-  if (model_format == Frontend::ONNX) {
-    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
-  } else {
-    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
-  }
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool MODNet::Initialize() {
-  // parameters for preprocess
-  size = {256, 256};
-  alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f};
-  beta = {-1.f, -1.f, -1.f};  // RGB
-  swap_rb = true;
-
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool MODNet::Preprocess(Mat* mat, FDTensor* output,
-                        std::map<std::string, std::array<int, 2>>* im_info) {
-  // 1. Resize
-  // 2. BGR2RGB
-  // 3. Convert(opencv style) or Normalize
-  // 4. HWC2CHW
-  int resize_w = size[0];
-  int resize_h = size[1];
-  if (resize_h != mat->Height() || resize_w != mat->Width()) {
-    Resize::Run(mat, resize_w, resize_h);
-  }
-  if (swap_rb) {
-    BGR2RGB::Run(mat);
-  }
-
-  Convert::Run(mat, alpha, beta);
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {mat->Height(), mat->Width()};
-
-  HWC2CHW::Run(mat);
-  Cast::Run(mat, "float");
-
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
-  return true;
-}
-
-bool MODNet::Postprocess(
-    std::vector<FDTensor>& infer_result, MattingResult* result,
-    const std::map<std::string, std::array<int, 2>>& im_info) {
-  FDASSERT((infer_result.size() == 1),
-           "The default number of output tensor must be 1 according to "
-           "modnet.");
-  FDTensor& alpha_tensor = infer_result.at(0);  // (1,h,w,1)
-  FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch =1 now.");
-  if (alpha_tensor.dtype != FDDataType::FP32) {
-    FDERROR << "Only support post process with float32 data." << std::endl;
-    return false;
-  }
-
-  // 先获取alpha并resize (使用opencv)
-  auto iter_ipt = im_info.find("input_shape");
-  auto iter_out = im_info.find("output_shape");
-  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
-           "Cannot find input_shape or output_shape from im_info.");
-  int out_h = iter_out->second[0];
-  int out_w = iter_out->second[1];
-  int ipt_h = iter_ipt->second[0];
-  int ipt_w = iter_ipt->second[1];
-
-  // TODO: 需要修改成FDTensor或Mat的运算 现在依赖cv::Mat
-  float* alpha_ptr = static_cast<float*>(alpha_tensor.Data());
-  cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
-  Mat alpha_resized(alpha_zero_copy_ref);  // ref-only, zero copy.
-  if ((out_h != ipt_h) || (out_w != ipt_w)) {
-    // already allocated a new continuous memory after resize.
-    // cv::resize(alpha_resized, alpha_resized, cv::Size(ipt_w, ipt_h));
-    Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
-  }
-
-  result->Clear();
-  // note: must be setup shape before Resize
-  result->contain_foreground = false;
-  // 和输入原图大小对应的alpha
-  result->shape = {static_cast<int64_t>(ipt_h), static_cast<int64_t>(ipt_w)};
-  int numel = ipt_h * ipt_w;
-  int nbytes = numel * sizeof(float);
-  result->Resize(numel);
-  std::memcpy(result->alpha.data(), alpha_resized.GetCpuMat()->data, nbytes);
-  return true;
-}
-
-bool MODNet::Predict(cv::Mat* im, MattingResult* result) {
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_START(0)
-#endif
-
-  Mat mat(*im);
-  std::vector<FDTensor> input_tensors(1);
-
-  std::map<std::string, std::array<int, 2>> im_info;
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {mat.Height(), mat.Width()};
-  im_info["output_shape"] = {mat.Height(), mat.Width()};
-
-  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
-    FDERROR << "Failed to preprocess input image." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(0, "Preprocess")
-  TIMERECORD_START(1)
-#endif
-
-  input_tensors[0].name = InputInfoOfRuntime(0).name;
-  std::vector<FDTensor> output_tensors;
-  if (!Infer(input_tensors, &output_tensors)) {
-    FDERROR << "Failed to inference." << std::endl;
-    return false;
-  }
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(1, "Inference")
-  TIMERECORD_START(2)
-#endif
-
-  if (!Postprocess(output_tensors, result, im_info)) {
-    FDERROR << "Failed to post process." << std::endl;
-    return false;
-  }
-
-#ifdef FASTDEPLOY_DEBUG
-  TIMERECORD_END(2, "Postprocess")
-#endif
-  return true;
-}
-
-}  // namespace matting
-}  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet.h b/csrcs/fastdeploy/vision/matting/contrib/modnet.h
deleted file mode 100644
index 047fd3aea..000000000
--- a/csrcs/fastdeploy/vision/matting/contrib/modnet.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-
-namespace vision {
-
-namespace matting {
-
-class FASTDEPLOY_DECL MODNet : public FastDeployModel {
- public:
-  // 当model_format为ONNX时，无需指定params_file
-  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  MODNet(const std::string& model_file, const std::string& params_file = "",
-         const RuntimeOption& custom_option = RuntimeOption(),
-         const Frontend& model_format = Frontend::ONNX);
-
-  // 定义模型的名称
-  std::string ModelName() const { return "matting/MODNet"; }
-
-  // 以下为一些可供用户修改的属性
-  // tuple of (width, height), default (256, 256)
-  std::vector<int> size;
-  // 归一化的 alpha 和 beta，x'=x*alpha+beta
-  std::vector<float> alpha;
-  std::vector<float> beta;
-  // whether to swap the B and R channel, such as BGR->RGB, default true.
-  bool swap_rb;
-
-  // 模型预测接口，即用户调用的接口
-  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
-  // result 为模型预测的输出结构体
-  bool Predict(cv::Mat* im, MattingResult* result);
-
- private:
-  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
-  bool Initialize();
-
-  // 输入图像预处理操作
-  // Mat为FastDeploy定义的数据结构
-  // FDTensor为预处理后的Tensor数据，传给后端进行推理
-  bool Preprocess(Mat* mat, FDTensor* output,
-                  std::map<std::string, std::array<int, 2>>* im_info);
-
-  // 后端推理结果后处理，输出给用户
-  // infer_result 为后端推理后的输出Tensor
-  // result 为模型预测的结果
-  bool Postprocess(std::vector<FDTensor>& infer_result, MattingResult* result,
-                   const std::map<std::string, std::array<int, 2>>& im_info);
-};
-
-}  // namespace matting
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc b/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc
deleted file mode 100644
index bfb8b1f88..000000000
--- a/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindMODNet(pybind11::module& m) {
-  // Bind MODNet
-  pybind11::class_<vision::matting::MODNet, FastDeployModel>(m, "MODNet")
-      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
-      .def("predict",
-           [](vision::matting::MODNet& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::MattingResult res;
-             self.Predict(&mat, &res);
-             return res;
-           })
-      .def_readwrite("size", &vision::matting::MODNet::size)
-      .def_readwrite("alpha", &vision::matting::MODNet::alpha)
-      .def_readwrite("beta", &vision::matting::MODNet::beta)
-      .def_readwrite("swap_rb", &vision::matting::MODNet::swap_rb);
-}
-
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/matting/matting_pybind.cc b/csrcs/fastdeploy/vision/matting/matting_pybind.cc
deleted file mode 100644
index e5fd78925..000000000
--- a/csrcs/fastdeploy/vision/matting/matting_pybind.cc
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindMODNet(pybind11::module& m);
-
-void BindMatting(pybind11::module& m) {
-  auto matting_module =
-      m.def_submodule("matting", "Image object matting models.");
-  BindMODNet(matting_module);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppcls/model.cc b/csrcs/fastdeploy/vision/ppcls/model.cc
deleted file mode 100644
index c4e5b767c..000000000
--- a/csrcs/fastdeploy/vision/ppcls/model.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/ppcls/model.h"
-#include "fastdeploy/vision/utils/utils.h"
-#include "yaml-cpp/yaml.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace ppcls {
-
-Model::Model(const std::string& model_file, const std::string& params_file,
-             const std::string& config_file, const RuntimeOption& custom_option,
-             const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
-  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool Model::Initialize() {
-  if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file."
-            << std::endl;
-    return false;
-  }
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool Model::BuildPreprocessPipelineFromConfig() {
-  processors_.clear();
-  YAML::Node cfg;
-  try {
-    cfg = YAML::LoadFile(config_file_);
-  } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file_
-            << ", maybe you should check this file." << std::endl;
-    return false;
-  }
-  auto preprocess_cfg = cfg["PreProcess"]["transform_ops"];
-  processors_.push_back(std::make_shared<BGR2RGB>());
-  for (const auto& op : preprocess_cfg) {
-    FDASSERT(op.IsMap(),
-             "Require the transform information in yaml be Map type.");
-    auto op_name = op.begin()->first.as<std::string>();
-    if (op_name == "ResizeImage") {
-      int target_size = op.begin()->second["resize_short"].as<int>();
-      bool use_scale = false;
-      int interp = 1;
-      processors_.push_back(
-          std::make_shared<ResizeByShort>(target_size, 1, use_scale));
-    } else if (op_name == "CropImage") {
-      int width = op.begin()->second["size"].as<int>();
-      int height = op.begin()->second["size"].as<int>();
-      processors_.push_back(std::make_shared<CenterCrop>(width, height));
-    } else if (op_name == "NormalizeImage") {
-      auto mean = op.begin()->second["mean"].as<std::vector<float>>();
-      auto std = op.begin()->second["std"].as<std::vector<float>>();
-      auto scale = op.begin()->second["scale"].as<float>();
-      FDASSERT((scale - 0.00392157) < 1e-06 && (scale - 0.00392157) > -1e-06,
-               "Only support scale in Normalize be 0.00392157, means the pixel "
-               "is in range of [0, 255].");
-      processors_.push_back(std::make_shared<Normalize>(mean, std));
-    } else if (op_name == "ToCHWImage") {
-      processors_.push_back(std::make_shared<HWC2CHW>());
-    } else {
-      FDERROR << "Unexcepted preprocess operator: " << op_name << "."
-              << std::endl;
-      return false;
-    }
-  }
-  return true;
-}
-
-bool Model::Preprocess(Mat* mat, FDTensor* output) {
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-  }
-
-  int channel = mat->Channels();
-  int width = mat->Width();
-  int height = mat->Height();
-  output->name = InputInfoOfRuntime(0).name;
-  output->SetExternalData({1, channel, height, width}, FDDataType::FP32,
-                          mat->GetCpuMat()->ptr());
-  return true;
-}
-
-bool Model::Postprocess(const FDTensor& infer_result, ClassifyResult* result,
-                        int topk) {
-  int num_classes = infer_result.shape[1];
-  const float* infer_result_buffer =
-      reinterpret_cast<const float*>(infer_result.data.data());
-  topk = std::min(num_classes, topk);
-  result->label_ids =
-      utils::TopKIndices(infer_result_buffer, num_classes, topk);
-  result->scores.resize(topk);
-  for (int i = 0; i < topk; ++i) {
-    result->scores[i] = *(infer_result_buffer + result->label_ids[i]);
-  }
-  return true;
-}
-
-bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
-  Mat mat(*im);
-  std::vector<FDTensor> processed_data(1);
-  if (!Preprocess(&mat, &(processed_data[0]))) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-
-  std::vector<FDTensor> infer_result(1);
-  if (!Infer(processed_data, &infer_result)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-
-  if (!Postprocess(infer_result[0], result, topk)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-} // namespace ppcls
-} // namespace vision
-} // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppcls/model.h b/csrcs/fastdeploy/vision/ppcls/model.h
deleted file mode 100644
index 71800a7d7..000000000
--- a/csrcs/fastdeploy/vision/ppcls/model.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace ppcls {
-
-class FASTDEPLOY_DECL Model : public FastDeployModel {
- public:
-  Model(const std::string& model_file, const std::string& params_file,
-        const std::string& config_file,
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::PADDLE);
-
-  std::string ModelName() const { return "ppclas-classify"; }
-
-  // TODO(jiangjiajun) Batch is on the way
-  virtual bool Predict(cv::Mat* im, ClassifyResult* result, int topk = 1);
-
- private:
-  bool Initialize();
-
-  bool BuildPreprocessPipelineFromConfig();
-
-  bool Preprocess(Mat* mat, FDTensor* outputs);
-
-  bool Postprocess(const FDTensor& infer_result, ClassifyResult* result,
-                   int topk = 1);
-
-  std::vector<std::shared_ptr<Processor>> processors_;
-  std::string config_file_;
-};
-}  // namespace ppcls
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc b/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc
deleted file mode 100644
index 10ff5ee10..000000000
--- a/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindPPCls(pybind11::module& m) {
-  auto ppcls_module = m.def_submodule("ppcls", "Module to deploy PaddleClas.");
-  pybind11::class_<vision::ppcls::Model, FastDeployModel>(ppcls_module, "Model")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict",
-           [](vision::ppcls::Model& self, pybind11::array& data, int topk = 1) {
-             auto mat = PyArrayToCvMat(data);
-             vision::ClassifyResult res;
-             self.Predict(&mat, &res, topk);
-             return res;
-           });
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc
deleted file mode 100644
index 7f692c6a7..000000000
--- a/csrcs/fastdeploy/vision/ppseg/model.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-#include "fastdeploy/vision/ppseg/model.h"
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-#include "yaml-cpp/yaml.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace ppseg {
-
-Model::Model(const std::string& model_file, const std::string& params_file,
-             const std::string& config_file, const RuntimeOption& custom_option,
-             const Frontend& model_format) {
-  config_file_ = config_file;
-  valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
-  valid_gpu_backends = {Backend::PDINFER, Backend::ORT};
-  runtime_option = custom_option;
-  runtime_option.model_format = model_format;
-  runtime_option.model_file = model_file;
-  runtime_option.params_file = params_file;
-  initialized = Initialize();
-}
-
-bool Model::Initialize() {
-  if (!BuildPreprocessPipelineFromConfig()) {
-    FDERROR << "Failed to build preprocess pipeline from configuration file."
-            << std::endl;
-    return false;
-  }
-  if (!InitRuntime()) {
-    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
-    return false;
-  }
-  return true;
-}
-
-bool Model::BuildPreprocessPipelineFromConfig() {
-  processors_.clear();
-  YAML::Node cfg;
-  processors_.push_back(std::make_shared<BGR2RGB>());
-  try {
-    cfg = YAML::LoadFile(config_file_);
-  } catch (YAML::BadFile& e) {
-    FDERROR << "Failed to load yaml file " << config_file_
-            << ", maybe you should check this file." << std::endl;
-    return false;
-  }
-
-  if (cfg["Deploy"]["transforms"]) {
-    auto preprocess_cfg = cfg["Deploy"]["transforms"];
-    for (const auto& op : preprocess_cfg) {
-      FDASSERT(op.IsMap(),
-               "Require the transform information in yaml be Map type.");
-      if (op["type"].as<std::string>() == "Normalize") {
-        std::vector<float> mean = {0.5, 0.5, 0.5};
-        std::vector<float> std = {0.5, 0.5, 0.5};
-        if (op["mean"]) {
-          mean = op["mean"].as<std::vector<float>>();
-        }
-        if (op["std"]) {
-          std = op["std"].as<std::vector<float>>();
-        }
-        processors_.push_back(std::make_shared<Normalize>(mean, std));
-
-      } else if (op["type"].as<std::string>() == "Resize") {
-        const auto& target_size = op["target_size"];
-        int resize_width = target_size[0].as<int>();
-        int resize_height = target_size[1].as<int>();
-        is_resized = true;
-        processors_.push_back(
-            std::make_shared<Resize>(resize_width, resize_height));
-      }
-    }
-    processors_.push_back(std::make_shared<HWC2CHW>());
-  }
-  return true;
-}
-
-bool Model::Preprocess(Mat* mat, FDTensor* output,
-                       std::map<std::string, std::array<int, 2>>* im_info) {
-  for (size_t i = 0; i < processors_.size(); ++i) {
-    if (processors_[i]->Name().compare("Resize") == 0) {
-      auto processor = dynamic_cast<Resize*>(processors_[i].get());
-      int resize_width = -1;
-      int resize_height = -1;
-      std::tie(resize_width, resize_height) = processor->GetWidthAndHeight();
-      if (is_vertical_screen && (resize_width > resize_height)) {
-        if (processor->SetWidthAndHeight(resize_height, resize_width)) {
-          FDERROR << "Failed to set Resize processor width and height "
-                  << processors_[i]->Name() << "." << std::endl;
-        }
-      }
-    }
-    if (!(*(processors_[i].get()))(mat)) {
-      FDERROR << "Failed to process image data in " << processors_[i]->Name()
-              << "." << std::endl;
-      return false;
-    }
-  }
-
-  // Record output shape of preprocessed image
-  (*im_info)["output_shape"] = {static_cast<int>(mat->Height()),
-                                static_cast<int>(mat->Width())};
-
-  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1);
-  output->name = InputInfoOfRuntime(0).name;
-  return true;
-}
-
-bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result,
-                        std::map<std::string, std::array<int, 2>>* im_info) {
-  // PaddleSeg has three types of inference output:
-  //     1. output with argmax and without softmax. 3-D matrix CHW, Channel
-  //     always 1, the element in matrix is classified label_id INT64 Type.
-  //     2. output without argmax and without softmax. 4-D matrix NCHW, N always
-  //     1, Channel is the num of classes. The element is the logits of classes
-  //     FP32
-  //     3. output without argmax and with softmax. 4-D matrix NCHW, the result
-  //     of 2 with softmax layer
-  // Fastdeploy output:
-  //     1. label_map
-  //     2. score_map(optional)
-  //     3. shape: 2-D HW
-  FDASSERT(infer_result.dtype == FDDataType::INT64 ||
-               infer_result.dtype == FDDataType::FP32,
-           "Require the data type of output is int64 or fp32, but now it's " +
-               Str(infer_result.dtype) + ".");
-  result->Clear();
-
-  if (infer_result.shape.size() == 4) {
-    FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1.");
-    // output without argmax
-    result->contain_score_map = true;
-    utils::NCHW2NHWC<float_t>(infer_result);
-  }
-
-  // for resize mat below
-  FDTensor new_infer_result;
-  Mat* mat = nullptr;
-  if (is_resized) {
-    cv::Mat temp_mat;
-    utils::FDTensor2FP32CVMat(temp_mat, infer_result,
-                              result->contain_score_map);
-
-    // original image shape
-    auto iter_ipt = (*im_info).find("input_shape");
-    FDASSERT(iter_ipt != im_info->end(),
-             "Cannot find input_shape from im_info.");
-    int ipt_h = iter_ipt->second[0];
-    int ipt_w = iter_ipt->second[1];
-
-    mat = new Mat(temp_mat);
-
-    Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1);
-    mat->ShareWithTensor(&new_infer_result);
-    new_infer_result.shape.insert(new_infer_result.shape.begin(), 1);
-    result->shape = new_infer_result.shape;
-  } else {
-    result->shape = infer_result.shape;
-  }
-  int out_num =
-      std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1,
-                      std::multiplies<int>());
-  // NCHW remove N or CHW remove C
-  result->shape.erase(result->shape.begin());
-  result->Resize(out_num);
-  if (result->contain_score_map) {
-    // output with label_map and score_map
-    float_t* infer_result_buffer = nullptr;
-    if (is_resized) {
-      infer_result_buffer = static_cast<float_t*>(new_infer_result.Data());
-    } else {
-      infer_result_buffer = static_cast<float_t*>(infer_result.Data());
-    }
-    // argmax
-    utils::ArgmaxScoreMap(infer_result_buffer, result, with_softmax);
-    result->shape.erase(result->shape.begin() + 2);
-  } else {
-    // output only with label_map
-    if (is_resized) {
-      float_t* infer_result_buffer =
-          static_cast<float_t*>(new_infer_result.Data());
-      for (int i = 0; i < out_num; i++) {
-        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
-      }
-    } else {
-      const int64_t* infer_result_buffer =
-          reinterpret_cast<const int64_t*>(infer_result.Data());
-      for (int i = 0; i < out_num; i++) {
-        result->label_map[i] = static_cast<uint8_t>(*(infer_result_buffer + i));
-      }
-    }
-  }
-  delete mat;
-  mat = nullptr;
-  return true;
-}
-
-bool Model::Predict(cv::Mat* im, SegmentationResult* result) {
-  Mat mat(*im);
-  std::vector<FDTensor> processed_data(1);
-
-  std::map<std::string, std::array<int, 2>> im_info;
-
-  // Record the shape of image and the shape of preprocessed image
-  im_info["input_shape"] = {static_cast<int>(mat.Height()),
-                            static_cast<int>(mat.Width())};
-  im_info["output_shape"] = {static_cast<int>(mat.Height()),
-                             static_cast<int>(mat.Width())};
-
-  if (!Preprocess(&mat, &(processed_data[0]), &im_info)) {
-    FDERROR << "Failed to preprocess input data while using model:"
-            << ModelName() << "." << std::endl;
-    return false;
-  }
-  std::vector<FDTensor> infer_result(1);
-  if (!Infer(processed_data, &infer_result)) {
-    FDERROR << "Failed to inference while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  if (!Postprocess(infer_result[0], result, &im_info)) {
-    FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-}  // namespace ppseg
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppseg/model.h b/csrcs/fastdeploy/vision/ppseg/model.h
deleted file mode 100644
index 72f8dbc64..000000000
--- a/csrcs/fastdeploy/vision/ppseg/model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/vision/common/processors/transform.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace ppseg {
-
-class FASTDEPLOY_DECL Model : public FastDeployModel {
- public:
-  Model(const std::string& model_file, const std::string& params_file,
-        const std::string& config_file,
-        const RuntimeOption& custom_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::PADDLE);
-
-  std::string ModelName() const { return "ppseg"; }
-
-  virtual bool Predict(cv::Mat* im, SegmentationResult* result);
-
-  bool with_softmax = false;
-
-  bool is_vertical_screen = false;
-
- private:
-  bool Initialize();
-
-  bool BuildPreprocessPipelineFromConfig();
-
-  bool Preprocess(Mat* mat, FDTensor* outputs,
-                  std::map<std::string, std::array<int, 2>>* im_info);
-
-  bool Postprocess(FDTensor& infer_result, SegmentationResult* result,
-                   std::map<std::string, std::array<int, 2>>* im_info);
-
-  bool is_resized = false;
-
-  std::vector<std::shared_ptr<Processor>> processors_;
-  std::string config_file_;
-};
-}  // namespace ppseg
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
deleted file mode 100644
index 949c27487..000000000
--- a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindPPSeg(pybind11::module& m) {
-  auto ppseg_module =
-      m.def_submodule("ppseg", "Module to deploy PaddleSegmentation.");
-  pybind11::class_<vision::ppseg::Model, FastDeployModel>(ppseg_module, "Model")
-      .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
-                          Frontend>())
-      .def("predict",
-           [](vision::ppseg::Model& self, pybind11::array& data) {
-             auto mat = PyArrayToCvMat(data);
-             vision::SegmentationResult* res = new vision::SegmentationResult();
-             // self.Predict(&mat, &res);
-             self.Predict(&mat, res);
-             return res;
-           })
-      .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax)
-      .def_readwrite("is_vertical_screen",
-                     &vision::ppseg::Model::is_vertical_screen);
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc b/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
deleted file mode 100644
index fdd110cb8..000000000
--- a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
-                        bool contain_score_map) {
-  // output with argmax channel is 1
-  int channel = 1;
-  int height = infer_result.shape[1];
-  int width = infer_result.shape[2];
-
-  if (contain_score_map) {
-    // output without argmax and convent to NHWC
-    channel = infer_result.shape[3];
-  }
-  // create FP32 cvmat
-  if (infer_result.dtype == FDDataType::INT64) {
-    FDWARNING << "The PaddleSeg model is exported with argmax. Inference "
-                 "result type is " +
-                     Str(infer_result.dtype) +
-                     ". If you want the edge of segmentation image more "
-                     "smoother. Please export model with --without_argmax "
-                     "--with_softmax."
-              << std::endl;
-    int64_t chw = channel * height * width;
-    int64_t* infer_result_buffer = static_cast<int64_t*>(infer_result.Data());
-    std::vector<float_t> float_result_buffer(chw);
-    mat = cv::Mat(height, width, CV_32FC(channel));
-    int index = 0;
-    for (int i = 0; i < height; i++) {
-      for (int j = 0; j < width; j++) {
-        mat.at<float_t>(i, j) =
-            static_cast<float_t>(infer_result_buffer[index++]);
-      }
-    }
-  } else if (infer_result.dtype == FDDataType::FP32) {
-    mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data());
-  }
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/cosine_similarity.cc b/csrcs/fastdeploy/vision/utils/cosine_similarity.cc
deleted file mode 100644
index 70d4e31dd..000000000
--- a/csrcs/fastdeploy/vision/utils/cosine_similarity.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-float CosineSimilarity(const std::vector<float>& a, const std::vector<float>& b,
-                       bool normalized) {
-  // 计算余弦相似度
-  FDASSERT((a.size() == b.size()) && (a.size() != 0),
-           "The size of a and b must be equal and >= 1.");
-  size_t num_val = a.size();
-  if (normalized) {
-    float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f;
-    for (size_t i = 0; i < num_val; ++i) {
-      mul_a += (a[i] * a[i]);
-      mul_b += (b[i] * b[i]);
-      mul_ab += (a[i] * b[i]);
-    }
-    return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b)));
-  }
-  auto norm_a = L2Normalize(a);
-  auto norm_b = L2Normalize(b);
-  float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f;
-  for (size_t i = 0; i < num_val; ++i) {
-    mul_a += (norm_a[i] * norm_a[i]);
-    mul_b += (norm_b[i] * norm_b[i]);
-    mul_ab += (norm_a[i] * norm_b[i]);
-  }
-  return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b)));
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/l2_normalize.cc b/csrcs/fastdeploy/vision/utils/l2_normalize.cc
deleted file mode 100644
index f5752b848..000000000
--- a/csrcs/fastdeploy/vision/utils/l2_normalize.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-std::vector<float> L2Normalize(const std::vector<float>& values) {
-  size_t num_val = values.size();
-  if (num_val == 0) {
-    return {};
-  }
-  std::vector<float> norm;
-  float l2_sum_val = 0.f;
-  for (size_t i = 0; i < num_val; ++i) {
-    l2_sum_val += (values[i] * values[i]);
-  }
-  float l2_sum_sqrt = std::sqrt(l2_sum_val);
-  norm.resize(num_val);
-  for (size_t i = 0; i < num_val; ++i) {
-    norm[i] = values[i] / l2_sum_sqrt;
-  }
-  return norm;
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/nms.cc b/csrcs/fastdeploy/vision/utils/nms.cc
deleted file mode 100644
index 900acf84d..000000000
--- a/csrcs/fastdeploy/vision/utils/nms.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/utils/perf.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-// The implementation refers to
-// https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/cpp/src/utils.cc
-void NMS(DetectionResult* result, float iou_threshold) {
-  utils::SortDetectionResult(result);
-
-  std::vector<float> area_of_boxes(result->boxes.size());
-  std::vector<int> suppressed(result->boxes.size(), 0);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) *
-                       (result->boxes[i][3] - result->boxes[i][1]);
-  }
-
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    if (suppressed[i] == 1) {
-      continue;
-    }
-    for (size_t j = i + 1; j < result->boxes.size(); ++j) {
-      if (suppressed[j] == 1) {
-        continue;
-      }
-      float xmin = std::max(result->boxes[i][0], result->boxes[j][0]);
-      float ymin = std::max(result->boxes[i][1], result->boxes[j][1]);
-      float xmax = std::min(result->boxes[i][2], result->boxes[j][2]);
-      float ymax = std::min(result->boxes[i][3], result->boxes[j][3]);
-      float overlap_w = std::max(0.0f, xmax - xmin);
-      float overlap_h = std::max(0.0f, ymax - ymin);
-      float overlap_area = overlap_w * overlap_h;
-      float overlap_ratio =
-          overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area);
-      if (overlap_ratio > iou_threshold) {
-        suppressed[j] = 1;
-      }
-    }
-  }
-  DetectionResult backup(*result);
-  result->Clear();
-  result->Reserve(suppressed.size());
-  for (size_t i = 0; i < suppressed.size(); ++i) {
-    if (suppressed[i] == 1) {
-      continue;
-    }
-    result->boxes.emplace_back(backup.boxes[i]);
-    result->scores.push_back(backup.scores[i]);
-    result->label_ids.push_back(backup.label_ids[i]);
-  }
-}
-
-void NMS(FaceDetectionResult* result, float iou_threshold) {
-  utils::SortDetectionResult(result);
-
-  std::vector<float> area_of_boxes(result->boxes.size());
-  std::vector<int> suppressed(result->boxes.size(), 0);
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) *
-                       (result->boxes[i][3] - result->boxes[i][1]);
-  }
-
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    if (suppressed[i] == 1) {
-      continue;
-    }
-    for (size_t j = i + 1; j < result->boxes.size(); ++j) {
-      if (suppressed[j] == 1) {
-        continue;
-      }
-      float xmin = std::max(result->boxes[i][0], result->boxes[j][0]);
-      float ymin = std::max(result->boxes[i][1], result->boxes[j][1]);
-      float xmax = std::min(result->boxes[i][2], result->boxes[j][2]);
-      float ymax = std::min(result->boxes[i][3], result->boxes[j][3]);
-      float overlap_w = std::max(0.0f, xmax - xmin);
-      float overlap_h = std::max(0.0f, ymax - ymin);
-      float overlap_area = overlap_w * overlap_h;
-      float overlap_ratio =
-          overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area);
-      if (overlap_ratio > iou_threshold) {
-        suppressed[j] = 1;
-      }
-    }
-  }
-  FaceDetectionResult backup(*result);
-  int landmarks_per_face = result->landmarks_per_face;
-
-  result->Clear();
-  // don't forget to reset the landmarks_per_face
-  // before apply Reserve method.
-  result->landmarks_per_face = landmarks_per_face;
-  result->Reserve(suppressed.size());
-  for (size_t i = 0; i < suppressed.size(); ++i) {
-    if (suppressed[i] == 1) {
-      continue;
-    }
-    result->boxes.emplace_back(backup.boxes[i]);
-    result->scores.push_back(backup.scores[i]);
-    // landmarks (if have)
-    if (result->landmarks_per_face > 0) {
-      for (size_t j = 0; j < result->landmarks_per_face; ++j) {
-        result->landmarks.emplace_back(
-            backup.landmarks[i * result->landmarks_per_face + j]);
-      }
-    }
-  }
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/sort_det_res.cc b/csrcs/fastdeploy/vision/utils/sort_det_res.cc
deleted file mode 100644
index 93dbb6969..000000000
--- a/csrcs/fastdeploy/vision/utils/sort_det_res.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-void Merge(DetectionResult* result, size_t low, size_t mid, size_t high) {
-  std::vector<std::array<float, 4>>& boxes = result->boxes;
-  std::vector<float>& scores = result->scores;
-  std::vector<int32_t>& label_ids = result->label_ids;
-  std::vector<std::array<float, 4>> temp_boxes(boxes);
-  std::vector<float> temp_scores(scores);
-  std::vector<int32_t> temp_label_ids(label_ids);
-  size_t i = low;
-  size_t j = mid + 1;
-  size_t k = i;
-  for (; i <= mid && j <= high; k++) {
-    if (temp_scores[i] >= temp_scores[j]) {
-      scores[k] = temp_scores[i];
-      label_ids[k] = temp_label_ids[i];
-      boxes[k] = temp_boxes[i];
-      i++;
-    } else {
-      scores[k] = temp_scores[j];
-      label_ids[k] = temp_label_ids[j];
-      boxes[k] = temp_boxes[j];
-      j++;
-    }
-  }
-  while (i <= mid) {
-    scores[k] = temp_scores[i];
-    label_ids[k] = temp_label_ids[i];
-    boxes[k] = temp_boxes[i];
-    k++;
-    i++;
-  }
-  while (j <= high) {
-    scores[k] = temp_scores[j];
-    label_ids[k] = temp_label_ids[j];
-    boxes[k] = temp_boxes[j];
-    k++;
-    j++;
-  }
-}
-
-void MergeSort(DetectionResult* result, size_t low, size_t high) {
-  if (low < high) {
-    size_t mid = (high - low) / 2 + low;
-    MergeSort(result, low, mid);
-    MergeSort(result, mid + 1, high);
-    Merge(result, low, mid, high);
-  }
-}
-
-void SortDetectionResult(DetectionResult* result) {
-  size_t low = 0;
-  size_t high = result->scores.size();
-  if (high == 0) {
-      return;
-  }
-  high = high - 1;
-  MergeSort(result, low, high);
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc b/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc
deleted file mode 100644
index 34150f9ac..000000000
--- a/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision/utils/utils.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-
-void SortDetectionResult(FaceDetectionResult* result) {
-  // sort face detection results with landmarks or not.
-  if (result->boxes.size() == 0) {
-    return;
-  }
-  int landmarks_per_face = result->landmarks_per_face;
-  if (landmarks_per_face > 0) {
-    FDASSERT(
-        (result->landmarks.size() == result->boxes.size() * landmarks_per_face),
-        "The size of landmarks != boxes.size * landmarks_per_face.");
-  }
-
-  // argsort for scores.
-  std::vector<size_t> indices;
-  indices.resize(result->boxes.size());
-  for (size_t i = 0; i < result->boxes.size(); ++i) {
-    indices[i] = i;
-  }
-  std::vector<float>& scores = result->scores;
-  std::sort(indices.begin(), indices.end(),
-            [&scores](size_t a, size_t b) { return scores[a] > scores[b]; });
-
-  // reorder boxes, scores, landmarks (if have).
-  FaceDetectionResult backup(*result);
-  result->Clear();
-  // don't forget to reset the landmarks_per_face
-  // before apply Reserve method.
-  result->landmarks_per_face = landmarks_per_face;
-  result->Reserve(indices.size());
-  if (landmarks_per_face > 0) {
-    for (size_t i = 0; i < indices.size(); ++i) {
-      result->boxes.emplace_back(backup.boxes[indices[i]]);
-      result->scores.push_back(backup.scores[indices[i]]);
-      for (size_t j = 0; j < landmarks_per_face; ++j) {
-        result->landmarks.emplace_back(
-            backup.landmarks[indices[i] * landmarks_per_face + j]);
-      }
-    }
-  } else {
-    for (size_t i = 0; i < indices.size(); ++i) {
-      result->boxes.emplace_back(backup.boxes[indices[i]]);
-      result->scores.push_back(backup.scores[indices[i]]);
-    }
-  }
-}
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/utils/utils.h b/csrcs/fastdeploy/vision/utils/utils.h
deleted file mode 100644
index 02cf16e9c..000000000
--- a/csrcs/fastdeploy/vision/utils/utils.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <set>
-#include <vector>
-#include "fastdeploy/core/fd_tensor.h"
-#include "fastdeploy/utils/utils.h"
-#include "fastdeploy/vision/common/result.h"
-
-namespace fastdeploy {
-namespace vision {
-namespace utils {
-// topk sometimes is a very small value
-// so this implementation is simple but I don't think it will
-// cost too much time
-// Also there may be cause problem since we suppose the minimum value is
-// -99999999
-// Do not use this function on array which topk contains value less than
-// -99999999
-template <typename T>
-std::vector<int32_t> TopKIndices(const T* array, int array_size, int topk) {
-  topk = std::min(array_size, topk);
-  std::vector<int32_t> res(topk);
-  std::set<int32_t> searched;
-  for (int32_t i = 0; i < topk; ++i) {
-    T min = -99999999;
-    for (int32_t j = 0; j < array_size; ++j) {
-      if (searched.find(j) != searched.end()) {
-        continue;
-      }
-      if (*(array + j) > min) {
-        res[i] = j;
-        min = *(array + j);
-      }
-    }
-    searched.insert(res[i]);
-  }
-  return res;
-}
-
-template <typename T>
-void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result,
-                    bool with_softmax) {
-  int64_t height = result->shape[0];
-  int64_t width = result->shape[1];
-  int64_t num_classes = result->shape[2];
-  int index = 0;
-  for (size_t i = 0; i < height; ++i) {
-    for (size_t j = 0; j < width; ++j) {
-      int64_t s = (i * width + j) * num_classes;
-      T max_class_score = std::max_element(
-          infer_result_buffer + s, infer_result_buffer + s + num_classes);
-      int label_id = std::distance(infer_result_buffer + s, max_class_score);
-      if (label_id >= 255) {
-        FDWARNING << "label_id is stored by uint8_t, now the value is bigger "
-                     "than 255, it's "
-                  << static_cast<int>(label_id) << "." << std::endl;
-      }
-      result->label_map[index] = static_cast<uint8_t>(label_id);
-
-      if (with_softmax) {
-        double_t total = 0;
-        for (int k = 0; k < num_classes; k++) {
-          total += exp(*(infer_result_buffer + s + k) - *max_class_score);
-        }
-        double_t softmax_class_score = 1 / total;
-        result->score_map[index] = static_cast<float>(softmax_class_score);
-
-      } else {
-        result->score_map[index] = static_cast<float>(*max_class_score);
-      }
-      index++;
-    }
-  }
-}
-
-template <typename T>
-void NCHW2NHWC(FDTensor& infer_result) {
-  T* infer_result_buffer = reinterpret_cast<T*>(infer_result.MutableData());
-  int num = infer_result.shape[0];
-  int channel = infer_result.shape[1];
-  int height = infer_result.shape[2];
-  int width = infer_result.shape[3];
-  int chw = channel * height * width;
-  int wc = width * channel;
-  int wh = width * height;
-  std::vector<T> hwc_data(chw);
-  int index = 0;
-  for (int n = 0; n < num; n++) {
-    for (int c = 0; c < channel; c++) {
-      for (int h = 0; h < height; h++) {
-        for (int w = 0; w < width; w++) {
-          hwc_data[n * chw + h * wc + w * channel + c] =
-              *(infer_result_buffer + index);
-          index++;
-        }
-      }
-    }
-  }
-  std::memcpy(infer_result.MutableData(), hwc_data.data(),
-              num * chw * sizeof(T));
-  infer_result.shape = {num, height, width, channel};
-}
-
-void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result,
-                        bool contain_score_map);
-
-void NMS(DetectionResult* output, float iou_threshold = 0.5);
-
-void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
-
-// MergeSort
-void SortDetectionResult(DetectionResult* output);
-
-void SortDetectionResult(FaceDetectionResult* result);
-
-// L2 Norm / cosine similarity  (for face recognition, ...)
-FASTDEPLOY_DECL std::vector<float> L2Normalize(
-    const std::vector<float>& values);
-
-FASTDEPLOY_DECL float CosineSimilarity(const std::vector<float>& a,
-                                       const std::vector<float>& b,
-                                       bool normalized = true);
-
-}  // namespace utils
-}  // namespace vision
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/vision_pybind.cc b/csrcs/fastdeploy/vision/vision_pybind.cc
deleted file mode 100644
index 6528dd22b..000000000
--- a/csrcs/fastdeploy/vision/vision_pybind.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-
-void BindPPCls(pybind11::module& m);
-void BindPPDet(pybind11::module& m);
-void BindPPSeg(pybind11::module& m);
-
-void BindDetection(pybind11::module& m);
-void BindMatting(pybind11::module& m);
-void BindFaceDet(pybind11::module& m);
-void BindFaceId(pybind11::module& m);
-#ifdef ENABLE_VISION_VISUALIZE
-void BindVisualize(pybind11::module& m);
-#endif
-
-void BindVision(pybind11::module& m) {
-  pybind11::class_<vision::ClassifyResult>(m, "ClassifyResult")
-      .def(pybind11::init())
-      .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
-      .def_readwrite("scores", &vision::ClassifyResult::scores)
-      .def("__repr__", &vision::ClassifyResult::Str)
-      .def("__str__", &vision::ClassifyResult::Str);
-
-  pybind11::class_<vision::DetectionResult>(m, "DetectionResult")
-      .def(pybind11::init())
-      .def_readwrite("boxes", &vision::DetectionResult::boxes)
-      .def_readwrite("scores", &vision::DetectionResult::scores)
-      .def_readwrite("label_ids", &vision::DetectionResult::label_ids)
-      .def("__repr__", &vision::DetectionResult::Str)
-      .def("__str__", &vision::DetectionResult::Str);
-
-  pybind11::class_<vision::FaceDetectionResult>(m, "FaceDetectionResult")
-      .def(pybind11::init())
-      .def_readwrite("boxes", &vision::FaceDetectionResult::boxes)
-      .def_readwrite("scores", &vision::FaceDetectionResult::scores)
-      .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks)
-      .def_readwrite("landmarks_per_face",
-                     &vision::FaceDetectionResult::landmarks_per_face)
-      .def("__repr__", &vision::FaceDetectionResult::Str)
-      .def("__str__", &vision::FaceDetectionResult::Str);
-
-  pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
-      .def(pybind11::init())
-      .def_readwrite("label_map", &vision::SegmentationResult::label_map)
-      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
-      .def_readwrite("shape", &vision::SegmentationResult::shape)
-      .def_readwrite("shape", &vision::SegmentationResult::shape)
-      .def("__repr__", &vision::SegmentationResult::Str)
-      .def("__str__", &vision::SegmentationResult::Str);
-
-  pybind11::class_<vision::FaceRecognitionResult>(m, "FaceRecognitionResult")
-      .def(pybind11::init())
-      .def_readwrite("embedding", &vision::FaceRecognitionResult::embedding)
-      .def("__repr__", &vision::FaceRecognitionResult::Str)
-      .def("__str__", &vision::FaceRecognitionResult::Str);
-
-  pybind11::class_<vision::MattingResult>(m, "MattingResult")
-      .def(pybind11::init())
-      .def_readwrite("alpha", &vision::MattingResult::alpha)
-      .def_readwrite("foreground", &vision::MattingResult::foreground)
-      .def_readwrite("shape", &vision::MattingResult::shape)
-      .def_readwrite("contain_foreground", &vision::MattingResult::shape)
-      .def("__repr__", &vision::MattingResult::Str)
-      .def("__str__", &vision::MattingResult::Str);
-
-  BindPPCls(m);
-  BindPPDet(m);
-  BindPPSeg(m);
-
-  BindDetection(m);
-  BindFaceDet(m);
-  BindFaceId(m);
-  BindMatting(m);
-#ifdef ENABLE_VISION_VISUALIZE
-  BindVisualize(m);
-#endif
-}
-}  // namespace fastdeploy
diff --git a/csrcs/fastdeploy/vision/visualize/detection.cc b/csrcs/fastdeploy/vision/visualize/detection.cc
deleted file mode 100644
index 147ef6556..000000000
--- a/csrcs/fastdeploy/vision/visualize/detection.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-namespace vision {
-
-// Default only support visualize num_classes <= 1000
-// If need to visualize num_classes > 1000
-// Please call Visualize::GetColorMap(num_classes) first
-cv::Mat Visualize::VisDetection(const cv::Mat& im,
-                                const DetectionResult& result, int line_size,
-                                float font_size) {
-  auto color_map = GetColorMap();
-  int h = im.rows;
-  int w = im.cols;
-  auto vis_im = im.clone();
-  for (size_t i = 0; i < result.boxes.size(); ++i) {
-    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
-                  result.boxes[i][2] - result.boxes[i][0],
-                  result.boxes[i][3] - result.boxes[i][1]);
-    int c0 = color_map[3 * result.label_ids[i] + 0];
-    int c1 = color_map[3 * result.label_ids[i] + 1];
-    int c2 = color_map[3 * result.label_ids[i] + 2];
-    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
-    std::string id = std::to_string(result.label_ids[i]);
-    std::string score = std::to_string(result.scores[i]);
-    if (score.size() > 4) {
-      score = score.substr(0, 4);
-    }
-    std::string text = id + "," + score;
-    int font = cv::FONT_HERSHEY_SIMPLEX;
-    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
-    cv::Point origin;
-    origin.x = rect.x;
-    origin.y = rect.y;
-    cv::Rect text_background =
-        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
-                 text_size.width, text_size.height);
-    cv::rectangle(vis_im, rect, rect_color, line_size);
-    cv::putText(vis_im, text, origin, font, font_size,
-                cv::Scalar(255, 255, 255), 1);
-  }
-  return vis_im;
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/face_detection.cc b/csrcs/fastdeploy/vision/visualize/face_detection.cc
deleted file mode 100644
index d9da27786..000000000
--- a/csrcs/fastdeploy/vision/visualize/face_detection.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-
-namespace vision {
-
-// Default only support visualize num_classes <= 1000
-// If need to visualize num_classes > 1000
-// Please call Visualize::GetColorMap(num_classes) first
-cv::Mat Visualize::VisFaceDetection(const cv::Mat& im,
-                                    const FaceDetectionResult& result,
-                                    int line_size, float font_size) {
-  auto color_map = GetColorMap();
-  int h = im.rows;
-  int w = im.cols;
-
-  auto vis_im = im.clone();
-  bool vis_landmarks = false;
-  if ((result.landmarks_per_face > 0) &&
-      (result.boxes.size() * result.landmarks_per_face ==
-       result.landmarks.size())) {
-    vis_landmarks = true;
-  }
-  for (size_t i = 0; i < result.boxes.size(); ++i) {
-    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
-                  result.boxes[i][2] - result.boxes[i][0],
-                  result.boxes[i][3] - result.boxes[i][1]);
-    int color_id = i % 333;
-    int c0 = color_map[3 * color_id + 0];
-    int c1 = color_map[3 * color_id + 1];
-    int c2 = color_map[3 * color_id + 2];
-    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
-    std::string text = std::to_string(result.scores[i]);
-    if (text.size() > 4) {
-      text = text.substr(0, 4);
-    }
-    int font = cv::FONT_HERSHEY_SIMPLEX;
-    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
-    cv::Point origin;
-    origin.x = rect.x;
-    origin.y = rect.y;
-    cv::Rect text_background =
-        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
-                 text_size.width, text_size.height);
-    cv::rectangle(vis_im, rect, rect_color, line_size);
-    cv::putText(vis_im, text, origin, font, font_size,
-                cv::Scalar(255, 255, 255), 1);
-    // vis landmarks (if have)
-    if (vis_landmarks) {
-      cv::Scalar landmark_color = rect_color;
-      for (size_t j = 0; j < result.landmarks_per_face; ++j) {
-        cv::Point landmark;
-        landmark.x = static_cast<int>(
-            result.landmarks[i * result.landmarks_per_face + j][0]);
-        landmark.y = static_cast<int>(
-            result.landmarks[i * result.landmarks_per_face + j][1]);
-        cv::circle(vis_im, landmark, line_size, landmark_color, -1);
-      }
-    }
-  }
-  return vis_im;
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
-
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/matting_alpha.cc b/csrcs/fastdeploy/vision/visualize/matting_alpha.cc
deleted file mode 100644
index 1018018c6..000000000
--- a/csrcs/fastdeploy/vision/visualize/matting_alpha.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-namespace vision {
-
-static void RemoveSmallConnectedArea(cv::Mat* alpha_pred,
-                                     float threshold = 0.05f) {
-  // 移除小的联通区域和噪点 开闭合形态学处理
-  // 假设输入的是透明度alpha, 值域(0.,1.)
-  cv::Mat gray, binary;
-  (*alpha_pred).convertTo(gray, CV_8UC1, 255.f);
-  // 255 * 0.05 ~ 13
-  unsigned int binary_threshold = static_cast<unsigned int>(255.f * threshold);
-  cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY);
-  // morphologyEx with OPEN operation to remove noise first.
-  auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3),
-                                          cv::Point(-1, -1));
-  cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel);
-  // Computationally connected domain
-  cv::Mat labels = cv::Mat::zeros((*alpha_pred).size(), CV_32S);
-  cv::Mat stats, centroids;
-  int num_labels =
-      cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4);
-  if (num_labels <= 1) {
-    // no noise, skip.
-    return;
-  }
-  // find max connected area, 0 is background
-  int max_connected_id = 1;  // 1,2,...
-  int max_connected_area = stats.at<int>(max_connected_id, cv::CC_STAT_AREA);
-  for (int i = 1; i < num_labels; ++i) {
-    int tmp_connected_area = stats.at<int>(i, cv::CC_STAT_AREA);
-    if (tmp_connected_area > max_connected_area) {
-      max_connected_area = tmp_connected_area;
-      max_connected_id = i;
-    }
-  }
-  const int h = (*alpha_pred).rows;
-  const int w = (*alpha_pred).cols;
-  // remove small connected area.
-  for (int i = 0; i < h; ++i) {
-    int* label_row_ptr = labels.ptr<int>(i);
-    float* alpha_row_ptr = (*alpha_pred).ptr<float>(i);
-    for (int j = 0; j < w; ++j) {
-      if (label_row_ptr[j] != max_connected_id) alpha_row_ptr[j] = 0.f;
-    }
-  }
-}
-
-cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im,
-                                   const MattingResult& result,
-                                   bool remove_small_connected_area) {
-  // 只可视化alpha，fgr(前景)本身就是一张图 不需要可视化
-  FDASSERT((!im.empty()), "im can't be empty!");
-  FDASSERT((im.channels() == 3), "Only support 3 channels mat!");
-
-  auto vis_img = im.clone();
-  int out_h = static_cast<int>(result.shape[0]);
-  int out_w = static_cast<int>(result.shape[1]);
-  int height = im.rows;
-  int width = im.cols;
-  // alpha to cv::Mat && 避免resize等操作修改外部数据
-  std::vector<float> alpha_copy;
-  alpha_copy.assign(result.alpha.begin(), result.alpha.end());
-  float* alpha_ptr = static_cast<float*>(alpha_copy.data());
-  cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr);
-  if (remove_small_connected_area) {
-    RemoveSmallConnectedArea(&alpha, 0.05f);
-  }
-  if ((out_h != height) || (out_w != width)) {
-    cv::resize(alpha, alpha, cv::Size(width, height));
-  }
-
-  if ((vis_img).type() != CV_8UC3) {
-    (vis_img).convertTo((vis_img), CV_8UC3);
-  }
-
-  uchar* vis_data = static_cast<uchar*>(vis_img.data);
-  uchar* im_data = static_cast<uchar*>(im.data);
-  float* alpha_data = reinterpret_cast<float*>(alpha.data);
-
-  for (size_t i = 0; i < height; ++i) {
-    for (size_t j = 0; j < width; ++j) {
-      float alpha_val = alpha_data[i * width + j];
-      vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
-          static_cast<float>(im_data[i * width * 3 + j * 3 + 0]) * alpha_val +
-          (1.f - alpha_val) * 153.f);
-      vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
-          static_cast<float>(im_data[i * width * 3 + j * 3 + 1]) * alpha_val +
-          (1.f - alpha_val) * 255.f);
-      vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
-          static_cast<float>(im_data[i * width * 3 + j * 3 + 2]) * alpha_val +
-          (1.f - alpha_val) * 120.f);
-    }
-  }
-  return vis_img;
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/segmentation.cc b/csrcs/fastdeploy/vision/visualize/segmentation.cc
deleted file mode 100644
index 7d3790328..000000000
--- a/csrcs/fastdeploy/vision/visualize/segmentation.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-
-#include "fastdeploy/vision/visualize/visualize.h"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc/imgproc.hpp"
-
-namespace fastdeploy {
-namespace vision {
-
-cv::Mat Visualize::VisSegmentation(const cv::Mat& im,
-                                   const SegmentationResult& result) {
-  auto color_map = GetColorMap();
-  int64_t height = result.shape[0];
-  int64_t width = result.shape[1];
-  auto vis_img = cv::Mat(height, width, CV_8UC3);
-
-  int64_t index = 0;
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      int category_id = result.label_map[index++];
-      vis_img.at<cv::Vec3b>(i, j)[0] = color_map[3 * category_id + 0];
-      vis_img.at<cv::Vec3b>(i, j)[1] = color_map[3 * category_id + 1];
-      vis_img.at<cv::Vec3b>(i, j)[2] = color_map[3 * category_id + 2];
-    }
-  }
-  cv::addWeighted(im, .5, vis_img, .5, 0, vis_img);
-  return vis_img;
-}
-
-}  // namespace vision
-}  // namespace fastdeploy
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/visualize.cc b/csrcs/fastdeploy/vision/visualize/visualize.cc
deleted file mode 100644
index 4ad6ba124..000000000
--- a/csrcs/fastdeploy/vision/visualize/visualize.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-#include "fastdeploy/vision/visualize/visualize.h"
-
-namespace fastdeploy {
-namespace vision {
-
-int Visualize::num_classes_ = 0;
-std::vector<int> Visualize::color_map_ = std::vector<int>();
-
-const std::vector<int>& Visualize::GetColorMap(int num_classes) {
-  if (num_classes < num_classes_) {
-    return color_map_;
-  }
-  num_classes_ = num_classes;
-  std::vector<int>().swap(color_map_);
-  color_map_.resize(3 * num_classes_, 0);
-  for (int i = 0; i < num_classes_; ++i) {
-    int j = 0;
-    int lab = i;
-    while (lab) {
-      color_map_[i * 3] |= (((lab >> 0) & 1) << (7 - j));
-      color_map_[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
-      color_map_[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
-      ++j;
-      lab >>= 3;
-    }
-  }
-  return color_map_;
-}
-
-} // namespace vision
-} // namespace fastdeploy
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/visualize.h b/csrcs/fastdeploy/vision/visualize/visualize.h
deleted file mode 100644
index bee62c301..000000000
--- a/csrcs/fastdeploy/vision/visualize/visualize.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef ENABLE_VISION_VISUALIZE
-#pragma once
-
-#include "fastdeploy/vision/common/result.h"
-#include "opencv2/imgproc/imgproc.hpp"
-namespace fastdeploy {
-namespace vision {
-
-class FASTDEPLOY_DECL Visualize {
- public:
-  static int num_classes_;
-  static std::vector<int> color_map_;
-  static const std::vector<int>& GetColorMap(int num_classes = 1000);
-  static cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
-                              int line_size = 1, float font_size = 0.5f);
-  static cv::Mat VisFaceDetection(const cv::Mat& im,
-                                  const FaceDetectionResult& result,
-                                  int line_size = 1, float font_size = 0.5f);
-  static cv::Mat VisSegmentation(const cv::Mat& im,
-                                 const SegmentationResult& result);
-  static cv::Mat VisMattingAlpha(const cv::Mat& im, const MattingResult& result,
-                                 bool remove_small_connected_area = false);
-};
-
-}  // namespace vision
-}  // namespace fastdeploy
-#endif
diff --git a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc b/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc
deleted file mode 100644
index 36010acf1..000000000
--- a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/pybind/main.h"
-
-namespace fastdeploy {
-void BindVisualize(pybind11::module& m) {
-  pybind11::class_<vision::Visualize>(m, "Visualize")
-      .def(pybind11::init<>())
-      .def_static("vis_detection",
-                  [](pybind11::array& im_data, vision::DetectionResult& result,
-                     int line_size, float font_size) {
-                    auto im = PyArrayToCvMat(im_data);
-                    auto vis_im = vision::Visualize::VisDetection(
-                        im, result, line_size, font_size);
-                    FDTensor out;
-                    vision::Mat(vis_im).ShareWithTensor(&out);
-                    return TensorToPyArray(out);
-                  })
-      .def_static(
-          "vis_face_detection",
-          [](pybind11::array& im_data, vision::FaceDetectionResult& result,
-             int line_size, float font_size) {
-            auto im = PyArrayToCvMat(im_data);
-            auto vis_im = vision::Visualize::VisFaceDetection(
-                im, result, line_size, font_size);
-            FDTensor out;
-            vision::Mat(vis_im).ShareWithTensor(&out);
-            return TensorToPyArray(out);
-          })
-      .def_static(
-          "vis_segmentation",
-          [](pybind11::array& im_data, vision::SegmentationResult& result) {
-            cv::Mat im = PyArrayToCvMat(im_data);
-            auto vis_im = vision::Visualize::VisSegmentation(im, result);
-            FDTensor out;
-            vision::Mat(vis_im).ShareWithTensor(&out);
-            return TensorToPyArray(out);
-          })
-      .def_static("vis_matting_alpha",
-                  [](pybind11::array& im_data, vision::MattingResult& result,
-                     bool remove_small_connected_area) {
-                    cv::Mat im = PyArrayToCvMat(im_data);
-                    auto vis_im = vision::Visualize::VisMattingAlpha(
-                        im, result, remove_small_connected_area);
-                    FDTensor out;
-                    vision::Mat(vis_im).ShareWithTensor(&out);
-                    return TensorToPyArray(out);
-                  });
-}
-}  // namespace fastdeploy
diff --git a/examples/.gitignore b/examples/.gitignore
deleted file mode 100644
index 0c684c6ae..000000000
--- a/examples/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-*.jpg
-*.png
-*.jpeg
-*.onnx
-*.engine
-*.pd*
-*.nb
-bin
\ No newline at end of file
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
deleted file mode 100644
index 770bf44da..000000000
--- a/examples/CMakeLists.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-function(add_fastdeploy_executable FIELD CC_FILE)
-  # temp target name/file var in function scope
-  set(TEMP_TARGET_FILE ${CC_FILE})
-  string(REGEX MATCHALL "[0-9A-Za-z_]*.cc" FILE_NAME ${CC_FILE})
-  string(REGEX REPLACE ".cc" "" FILE_PREFIX ${FILE_NAME})
-  set(TEMP_TARGET_NAME ${FIELD}_${FILE_PREFIX})
-  if (EXISTS ${TEMP_TARGET_FILE} AND TARGET fastdeploy)
-    add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE})
-    target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy)
-    message(STATUS "  Added FastDeploy Executable       : ${TEMP_TARGET_NAME}")
-  endif()
-  unset(TEMP_TARGET_FILE)
-  unset(TEMP_TARGET_NAME)
-endfunction()
-
-# vision examples
-if(WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/vision)
-  message(STATUS "")
-  message(STATUS "*************FastDeploy Examples Summary**********")
-  file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/vision/*.cc)
-  foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS})
-    add_fastdeploy_executable(vision ${_CC_FILE})
-  endforeach()
-endif()
-
-# text examples
-if(WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/text)
-  message(STATUS "")
-  message(STATUS "*************FastDeploy Examples Summary**********")
-  file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/text/*.cc)
-  foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS})
-    add_fastdeploy_executable(text ${_CC_FILE})
-  endforeach()
-endif()
-
-# other examples ...
diff --git a/examples/resources/.gitignore b/examples/resources/.gitignore
deleted file mode 100644
index aadf70252..000000000
--- a/examples/resources/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-images/*.jpg
-images/*.jpeg
-images/*.png
-models/*.onnx
-models/*.pd*
-models/*.engine
-models/*.trt
-models/*.nb
-models/*param*
-models/*model*
-outputs/*.jpg
-outputs/*.jpeg
-outputs/*.png
-outputs/*.txt
-outputs/*.json
\ No newline at end of file
diff --git a/examples/resources/images/.gitignore b/examples/resources/images/.gitignore
deleted file mode 100644
index a025c1b2f..000000000
--- a/examples/resources/images/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.jpg
-*.jpeg
-*.png
\ No newline at end of file
diff --git a/examples/resources/models/.gitignore b/examples/resources/models/.gitignore
deleted file mode 100644
index 8a3992492..000000000
--- a/examples/resources/models/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-*.onnx
-*.engine
-*.pd*
-*.nb
-*.trt
\ No newline at end of file
diff --git a/examples/resources/outputs/.gitignore b/examples/resources/outputs/.gitignore
deleted file mode 100644
index b90600fbe..000000000
--- a/examples/resources/outputs/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.jpg
-*.png
-*.jpeg
\ No newline at end of file
diff --git a/examples/text/ernie_tokencls.cc b/examples/text/ernie_tokencls.cc
deleted file mode 100644
index 1f04bbb66..000000000
--- a/examples/text/ernie_tokencls.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <iostream>
-#include <sstream>
-
-#include "fastdeploy/text.h"
-#include "tokenizers/ernie_faster_tokenizer.h"
-
-using namespace paddlenlp;
-
-void LoadTransitionFromFile(const std::string& file,
-                            std::vector<float>* transitions, int* num_tags) {
-  std::ifstream fin(file);
-  std::string curr_transition;
-  float transition;
-  int i = 0;
-  while (fin) {
-    std::getline(fin, curr_transition);
-    std::istringstream iss(curr_transition);
-    while (iss) {
-      iss >> transition;
-      transitions->push_back(transition);
-    }
-    if (curr_transition != "") {
-      ++i;
-    }
-  }
-  *num_tags = i;
-}
-
-// Only useful for axis = -1
-template <typename T>
-void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
-  auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) {
-    double score_max = *(std::max_element(score_vec, score_vec + label_num));
-    double e_sum = 0;
-    for (int j = 0; j < label_num; j++) {
-      softmax_vec[j] = std::exp(score_vec[j] - score_max);
-      e_sum += softmax_vec[j];
-    }
-    for (int k = 0; k < label_num; k++) {
-      softmax_vec[k] /= e_sum;
-    }
-  };
-
-  output->Allocate(input.shape, input.dtype);
-  int label_num = output->shape.back();
-  int batch_size = input.Numel() / label_num;
-  int offset = 0;
-  const T* input_ptr = reinterpret_cast<const T*>(input.Data());
-  T* output_ptr = reinterpret_cast<T*>(output->Data());
-  for (int i = 0; i < batch_size; ++i) {
-    softmax_func(input_ptr + offset, output_ptr + offset, label_num);
-    offset += label_num;
-  }
-}
-
-// Only useful for axis = -1
-template <typename T>
-void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
-  auto output_shape = input.shape;
-  output_shape.back() = 1;
-  output->Allocate(output_shape, input.dtype);
-  int batch_size = output->Numel();
-  int label_num = input.shape.back();
-  int offset = 0;
-  const T* input_ptr = reinterpret_cast<const T*>(input.Data());
-  T* output_ptr = reinterpret_cast<T*>(output->Data());
-  for (int i = 0; i < batch_size; ++i) {
-    output_ptr[i] =
-        *(std::max_element(input_ptr + offset, input_ptr + offset + label_num));
-    offset += label_num;
-  }
-}
-
-template <typename T>
-void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
-                   const fastdeploy::FDTensor& trans,
-                   fastdeploy::FDTensor* best_path) {
-  int batch_size = slot_logits.shape[0];
-  int seq_len = slot_logits.shape[1];
-  int num_tags = slot_logits.shape[2];
-  best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);
-
-  const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
-  const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
-  int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
-  std::vector<T> scores(num_tags);
-  std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
-  std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
-  for (int b = 0; b < batch_size; ++b) {
-    std::vector<std::vector<int>> paths;
-    const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
-    int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
-    for (int t = 1; t < seq_len; t++) {
-      for (size_t i = 0; i < num_tags; i++) {
-        for (size_t j = 0; j < num_tags; j++) {
-          auto trans_idx = i * num_tags * num_tags + j * num_tags;
-          auto slot_logit_idx = t * num_tags + j;
-          M[i][j] = scores[i] + trans_ptr[trans_idx] +
-                    curr_slot_logits_ptr[slot_logit_idx];
-        }
-      }
-      std::vector<int> idxs;
-      for (size_t i = 0; i < num_tags; i++) {
-        T max = 0.0f;
-        int idx = 0;
-        for (size_t j = 0; j < num_tags; j++) {
-          if (M[j][i] > max) {
-            max = M[j][i];
-            idx = j;
-          }
-        }
-        scores[i] = max;
-        idxs.push_back(idx);
-      }
-      paths.push_back(idxs);
-    }
-    int scores_max_index = 0;
-    float scores_max = 0.0f;
-    for (size_t i = 0; i < scores.size(); i++) {
-      if (scores[i] > scores_max) {
-        scores_max = scores[i];
-        scores_max_index = i;
-      }
-    }
-    curr_best_path_ptr[seq_len - 1] = scores_max_index;
-    for (int i = seq_len - 2; i >= 0; i--) {
-      int index = curr_best_path_ptr[i + 1];
-      curr_best_path_ptr[i] = paths[i][index];
-    }
-  }
-}
-
-int main() {
-  // 1. Define a ernie faster tokenizer
-  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
-      "ernie_vocab.txt");
-  std::vector<faster_tokenizer::core::EncodeInput> strings_list = {
-      "导航去科技园二号楼", "屏幕亮度为我减小一点吧"};
-  std::vector<faster_tokenizer::core::Encoding> encodings;
-  tokenizer.EncodeBatchStrings(strings_list, &encodings);
-  size_t batch_size = strings_list.size();
-  size_t seq_len = encodings[0].GetLen();
-  for (auto&& encoding : encodings) {
-    std::cout << encoding.DebugString() << std::endl;
-  }
-  // 2. Initialize runtime
-  fastdeploy::RuntimeOption runtime_option;
-  runtime_option.SetModelPath("nano_static/model.pdmodel",
-                              "nano_static/model.pdiparams");
-  fastdeploy::Runtime runtime;
-  runtime.Init(runtime_option);
-
-  // 3. Construct input vector
-  // 3.1 Convert encodings to input_ids, token_type_ids
-  std::vector<int64_t> input_ids, token_type_ids;
-  for (int i = 0; i < encodings.size(); ++i) {
-    auto&& curr_input_ids = encodings[i].GetIds();
-    auto&& curr_type_ids = encodings[i].GetTypeIds();
-    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
-                     curr_input_ids.end());
-    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
-                          curr_type_ids.end());
-  }
-  // 3.2 Set data to input vector
-  std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
-  void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
-  for (int i = 0; i < runtime.NumInputs(); ++i) {
-    inputs[i].SetExternalData({batch_size, seq_len},
-                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
-    inputs[i].name = runtime.GetInputInfo(i).name;
-  }
-
-  // 4. Infer
-  std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
-  runtime.Infer(inputs, &outputs);
-
-  // 5. Postprocess
-  fastdeploy::FDTensor domain_probs, intent_probs;
-  Softmax<float>(outputs[0], &domain_probs);
-  Softmax<float>(outputs[1], &intent_probs);
-
-  fastdeploy::FDTensor domain_max_probs, intent_max_probs;
-  Max<float>(domain_probs, &domain_max_probs);
-  Max<float>(intent_probs, &intent_max_probs);
-
-  std::vector<float> transition;
-  int num_tags;
-  LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
-  fastdeploy::FDTensor trans;
-  trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
-                        transition.data());
-
-  fastdeploy::FDTensor best_path;
-  ViterbiDecode<float>(outputs[2], trans, &best_path);
-  // 6. Print result
-  domain_max_probs.PrintInfo();
-  intent_max_probs.PrintInfo();
-
-  batch_size = best_path.shape[0];
-  seq_len = best_path.shape[1];
-  const int64_t* best_path_ptr =
-      reinterpret_cast<const int64_t*>(best_path.Data());
-  for (int i = 0; i < batch_size; ++i) {
-    std::cout << "best_path[" << i << "] = ";
-    for (int j = 0; j < seq_len; ++j) {
-      std::cout << best_path_ptr[i * seq_len + j] << ", ";
-    }
-    std::cout << std::endl;
-  }
-  best_path.PrintInfo();
-  return 0;
-}
diff --git a/examples/vision/biubug6_retinaface.cc b/examples/vision/biubug6_retinaface.cc
deleted file mode 100644
index 65a396ff9..000000000
--- a/examples/vision/biubug6_retinaface.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file =
-      "../resources/models/Pytorch_RetinaFace_resnet50-720-1080.onnx";
-  std::string img_path = "../resources/images/test_face_det.jpg";
-  std::string vis_path =
-      "../resources/outputs/biubug6_retinaface_vis_result.jpg";
-
-  auto model = vis::biubug6::RetinaFace(model_file);
-  model.size = {1080, 720};  // (width, height)
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.3f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepcam_yolov5face.cc b/examples/vision/deepcam_yolov5face.cc
deleted file mode 100644
index c6e0083e0..000000000
--- a/examples/vision/deepcam_yolov5face.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolov5s-face.onnx";
-  std::string img_path = "../resources/images/test_face_det.jpg";
-  std::string vis_path =
-      "../resources/outputs/deepcam_yolov5face_vis_result.jpg";
-
-  auto model = vis::deepcam::YOLOv5Face(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.1f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_arcface.cc b/examples/vision/deepinsight_arcface.cc
deleted file mode 100644
index ff5626a32..000000000
--- a/examples/vision/deepinsight_arcface.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "../resources/models/ms1mv3_arcface_r100.onnx";
-  std::string face0_path = "../resources/images/face_recognition_0.png";
-  std::string face1_path = "../resources/images/face_recognition_1.png";
-  std::string face2_path = "../resources/images/face_recognition_2.png";
-
-  auto model = vis::deepinsight::ArcFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_cosface.cc b/examples/vision/deepinsight_cosface.cc
deleted file mode 100644
index 7787537ae..000000000
--- a/examples/vision/deepinsight_cosface.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "../resources/models/glint360k_cosface_r100.onnx";
-  std::string face0_path = "../resources/images/face_recognition_0.png";
-  std::string face1_path = "../resources/images/face_recognition_1.png";
-  std::string face2_path = "../resources/images/face_recognition_2.png";
-
-  auto model = vis::deepinsight::CosFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_insightface_rec.cc b/examples/vision/deepinsight_insightface_rec.cc
deleted file mode 100644
index b2579bad8..000000000
--- a/examples/vision/deepinsight_insightface_rec.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "../resources/models/ms1mv3_arcface_r100.onnx";
-  std::string face0_path = "../resources/images/face_recognition_0.png";
-  std::string face1_path = "../resources/images/face_recognition_1.png";
-  std::string face2_path = "../resources/images/face_recognition_2.png";
-
-  auto model = vis::deepinsight::InsightFaceRecognitionModel(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_partial_fc.cc b/examples/vision/deepinsight_partial_fc.cc
deleted file mode 100644
index 85ec01799..000000000
--- a/examples/vision/deepinsight_partial_fc.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "../resources/models/partial_fc_glint360k_r100.onnx";
-  std::string face0_path = "../resources/images/face_recognition_0.png";
-  std::string face1_path = "../resources/images/face_recognition_1.png";
-  std::string face2_path = "../resources/images/face_recognition_2.png";
-
-  auto model = vis::deepinsight::PartialFC(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_scrfd.cc b/examples/vision/deepinsight_scrfd.cc
deleted file mode 100644
index 0ff68db93..000000000
--- a/examples/vision/deepinsight_scrfd.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/SCRFD.onnx";
-  std::string img_path = "../resources/images/test_face_det.jpg";
-  std::string vis_path = "../resources/outputs/deepsight_scrfd_vis_result.jpg";
-
-  auto model = vis::deepinsight::SCRFD(model_file);
-  model.size = {640, 640};  // (width, height)
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.3f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/deepinsight_vpl.cc b/examples/vision/deepinsight_vpl.cc
deleted file mode 100644
index 8326d3569..000000000
--- a/examples/vision/deepinsight_vpl.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "../resources/models/ms1mv3_r100_lr01.onnx";
-  std::string face0_path = "../resources/images/face_recognition_0.png";
-  std::string face1_path = "../resources/images/face_recognition_1.png";
-  std::string face2_path = "../resources/images/face_recognition_2.png";
-
-  auto model = vis::deepinsight::VPL(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/new_examples/vision/detection/README.md b/examples/vision/detection/README.md
similarity index 100%
rename from new_examples/vision/detection/README.md
rename to examples/vision/detection/README.md
diff --git a/new_examples/vision/detection/yolov7/README.md b/examples/vision/detection/yolov7/README.md
similarity index 100%
rename from new_examples/vision/detection/yolov7/README.md
rename to examples/vision/detection/yolov7/README.md
diff --git a/new_examples/vision/detection/yolov7/cpp/CMakeLists.txt b/examples/vision/detection/yolov7/cpp/CMakeLists.txt
similarity index 100%
rename from new_examples/vision/detection/yolov7/cpp/CMakeLists.txt
rename to examples/vision/detection/yolov7/cpp/CMakeLists.txt
diff --git a/new_examples/vision/detection/yolov7/cpp/README.md b/examples/vision/detection/yolov7/cpp/README.md
similarity index 100%
rename from new_examples/vision/detection/yolov7/cpp/README.md
rename to examples/vision/detection/yolov7/cpp/README.md
diff --git a/new_examples/vision/detection/yolov7/cpp/infer.cc b/examples/vision/detection/yolov7/cpp/infer.cc
similarity index 100%
rename from new_examples/vision/detection/yolov7/cpp/infer.cc
rename to examples/vision/detection/yolov7/cpp/infer.cc
diff --git a/new_examples/vision/detection/yolov7/python/README.md b/examples/vision/detection/yolov7/python/README.md
similarity index 100%
rename from new_examples/vision/detection/yolov7/python/README.md
rename to examples/vision/detection/yolov7/python/README.md
diff --git a/new_examples/vision/detection/yolov7/python/infer.py b/examples/vision/detection/yolov7/python/infer.py
similarity index 100%
rename from new_examples/vision/detection/yolov7/python/infer.py
rename to examples/vision/detection/yolov7/python/infer.py
diff --git a/examples/vision/linzaer_ultraface.cc b/examples/vision/linzaer_ultraface.cc
deleted file mode 100644
index eb1cbafe8..000000000
--- a/examples/vision/linzaer_ultraface.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/version-RFB-320.onnx";
-  std::string img_path = "../resources/images/test_face_det_0.jpg";
-  std::string vis_path =
-      "../resources/outputs/linzaer_ultraface_vis_result.jpg";
-
-  auto model = vis::linzaer::UltraFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.7f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/megvii_yolox.cc b/examples/vision/megvii_yolox.cc
deleted file mode 100644
index 340694b54..000000000
--- a/examples/vision/megvii_yolox.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolox_s.onnx";
-  std::string img_path = "../resources/images/bus.jpg";
-  std::string vis_path  = "../resources/outputs/megvii_yolox_vis_result.jpg";
-
-  auto model = vis::megvii::YOLOX(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl; 
-  }
-  model.EnableDebug();
-  
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl; 
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/meituan_yolov6.cc b/examples/vision/meituan_yolov6.cc
deleted file mode 100644
index 7bdd78e5d..000000000
--- a/examples/vision/meituan_yolov6.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolov6s.onnx";
-  std::string img_path = "../resources/images/bus.jpg";
-  std::string vis_path  = "../resources/outputs/meituan_yolov6_vis_result.jpg";
-
-  auto model = vis::meituan::YOLOv6(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl; 
-  }
-  model.EnableDebug();
-  
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl; 
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/ppdet_ppyoloe.cc b/examples/vision/ppdet_ppyoloe.cc
deleted file mode 100644
index b234021c9..000000000
--- a/examples/vision/ppdet_ppyoloe.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "ppyoloe_crn_l_300e_coco/model.pdmodel";
-  std::string params_file = "ppyoloe_crn_l_300e_coco/model.pdiparams";
-  std::string config_file = "ppyoloe_crn_l_300e_coco/infer_cfg.yml";
-  std::string img_path = "test.jpeg";
-  std::string vis_path = "vis.jpeg";
-
-  auto model = vis::ppdet::PPYOLOE(model_file, params_file, config_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/ppogg_yolov5lite.cc b/examples/vision/ppogg_yolov5lite.cc
deleted file mode 100644
index 577543b37..000000000
--- a/examples/vision/ppogg_yolov5lite.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolov5lite.onnx";
-  std::string img_path = "../resources/images/test.jpg";
-  std::string vis_path = "../resources/outputs/ppogg_yolov5lite_vis_result.jpg";
-
-  auto model = vis::ppogg::YOLOv5Lite(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/ppseg_unet.cc b/examples/vision/ppseg_unet.cc
deleted file mode 100644
index cb33611ad..000000000
--- a/examples/vision/ppseg_unet.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "yaml-cpp/yaml.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/unet_Cityscapes/model.pdmodel";
-  std::string params_file =
-      "../resources/models/unet_Cityscapes/model.pdiparams";
-  std::string config_file = "../resources/models/unet_Cityscapes/deploy.yaml";
-  std::string img_path = "../resources/images/cityscapes_demo.png";
-  std::string vis_path = "../resources/outputs/vis.jpeg";
-
-  auto model = vis::ppseg::Model(model_file, params_file, config_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im;
-
-  vis::SegmentationResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  YAML::Node cfg = YAML::LoadFile(config_file);
-  int num_classes = 19;
-  if (cfg["Deploy"]["num_classes"]) {
-    num_classes = cfg["Deploy"]["num_classes"].as<int>();
-  }
-
-  // 可视化预测结果
-  vis::Visualize::VisSegmentation(im, res, &vis_im, num_classes);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Inference Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/rangilyu_nanodet_plus.cc b/examples/vision/rangilyu_nanodet_plus.cc
deleted file mode 100644
index 91dcd604e..000000000
--- a/examples/vision/rangilyu_nanodet_plus.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/nanodet-plus-m_320.onnx";
-  std::string img_path = "../resources/images/bus.jpg";
-  std::string vis_path =
-      "../resources/outputs/rangilyu_nanodet_plus_vis_result.jpg";
-
-  auto model = vis::rangilyu::NanoDetPlus(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/ultralytics_yolov5.cc b/examples/vision/ultralytics_yolov5.cc
deleted file mode 100644
index 42a233686..000000000
--- a/examples/vision/ultralytics_yolov5.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolov5s.onnx";
-  std::string img_path = "../resources/images/bus.jpg";
-  std::string vis_path  = "../resources/outputs/ultralytics_yolov5_vis_result.jpg";
-
-  auto model = vis::ultralytics::YOLOv5(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl; 
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl; 
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/wongkinyiu_scaledyolov4.cc b/examples/vision/wongkinyiu_scaledyolov4.cc
deleted file mode 100644
index 5374d3453..000000000
--- a/examples/vision/wongkinyiu_scaledyolov4.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/scaledyolov4.onnx";
-  std::string img_path = "../resources/images/bus.jpg";
-  std::string vis_path = "../resources/outputs/wongkinyiu_scaledyolov4_vis_result.jpg";
-
-  auto model = vis::wongkinyiu::ScaledYOLOv4(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/wongkinyiu_yolor.cc b/examples/vision/wongkinyiu_yolor.cc
deleted file mode 100644
index abdca2b7f..000000000
--- a/examples/vision/wongkinyiu_yolor.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolor.onnx";
-  std::string img_path = "../resources/images/horses.jpg";
-  std::string vis_path = "../resources/outputs/wongkinyiu_yolor_vis_result.jpg";
-
-  auto model = vis::wongkinyiu::YOLOR(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/wongkinyiu_yolov7.cc b/examples/vision/wongkinyiu_yolov7.cc
deleted file mode 100644
index 7de033cae..000000000
--- a/examples/vision/wongkinyiu_yolov7.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "../resources/models/yolov7.onnx";
-  std::string img_path = "../resources/images/horses.jpg";
-  std::string vis_path  = "../resources/outputs/wongkinyiu_yolov7_vis_result.jpg";
-
-  auto model = vis::wongkinyiu::YOLOv7(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl; 
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl; 
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/examples/vision/zhkkke_modnet.cc b/examples/vision/zhkkke_modnet.cc
deleted file mode 100644
index ee9f8df25..000000000
--- a/examples/vision/zhkkke_modnet.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file =
-      "../resources/models/modnet_photographic_portrait_matting.onnx";
-  std::string img_path = "../resources/images/matting_1.jpg";
-  std::string vis_path = "../resources/outputs/zhkkke_modnet_vis_result.jpg";
-
-  auto model = vis::zhkkke::MODNet(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  // 设置推理size, 必须和模型文件支持的
-  model.size = {256, 256};
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat im_old = im.clone();
-  cv::Mat vis_im = im.clone();
-
-  vis::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  bool remove_small_connected_area = true;
-  vis::Visualize::VisMattingAlpha(im_old, res, &vis_im,
-                                  remove_small_connected_area);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/model_zoo/.gitignore b/model_zoo/.gitignore
deleted file mode 100644
index e3919c57f..000000000
--- a/model_zoo/.gitignore
+++ /dev/null
@@ -1,12 +0,0 @@
-*.png
-*.jpg
-*.jpeg
-*.onnx
-*.zip
-*.tar
-*.pd*
-*.engine
-*.trt
-*.nb
-*.tgz
-*.gz
diff --git a/model_zoo/text/ernie-3.0/README.md b/model_zoo/text/ernie-3.0/README.md
deleted file mode 100755
index c60148579..000000000
--- a/model_zoo/text/ernie-3.0/README.md
+++ /dev/null
@@ -1,238 +0,0 @@
-# ERNIE 3.0 Python部署指南
-本文介绍 ERNIE 3.0 Python 端的部署，包括部署环境的准备，序列标注和分类两大场景下的使用示例。
-- [ERNIE 3.0 Python 部署指南](#ERNIE3.0Python部署指南)
-  - [1. 环境准备](#1-环境准备)
-    - [1.1 CPU 端](#11-CPU端)
-    - [1.2 GPU 端](#12-GPU端)
-  - [2. 序列标注模型推理](#2-序列标注模型推理)
-    - [2.1 模型获取](#21-模型获取)
-    - [2.2 CPU 端推理样例](#22-CPU端推理样例)
-    - [2.3 GPU 端推理样例](#23-GPU端推理样例)
-  - [3. 分类模型推理](#3-分类模型推理)
-    - [3.1 模型获取](#31-模型获取)
-    - [3.2 CPU 端推理样例](#32-CPU端推理样例)
-    - [3.3 GPU 端推理样例](#33-GPU端推理样例)
-## 1. 环境准备
-ERNIE 3.0 的部署分为 CPU 和 GPU 两种情况，请根据你的部署环境安装对应的依赖。
-### 1.1 CPU端
-CPU 端的部署请使用如下命令安装所需依赖
-```
-pip install -r requirements_cpu.txt
-```
-### 1.2 GPU端
-为了在 GPU 上获得最佳的推理性能和稳定性，请先确保机器已正确安装 NVIDIA 相关驱动和基础软件，确保 CUDA >= 11.2，CuDNN >= 8.2，并使用以下命令安装所需依赖
-```
-pip install -r requirements_gpu.txt
-```
-如需使用半精度（FP16）或量化（INT8）部署，请确保GPU设备的 CUDA 计算能力 (CUDA Compute Capability) 大于 7.0，典型的设备包括 V100、T4、A10、A100、GTX 20 系列和 30 系列显卡等。同时 INT8 推理需要安装 TensorRT 以及包含 TensorRT 预测库的 PaddlePaddle。
-更多关于 CUDA Compute Capability 和精度支持情况请参考 NVIDIA 文档：[GPU硬件与支持精度对照表](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/support-matrix/index.html#hardware-precision-matrix)
-
-1. TensorRT 安装请参考：[TensorRT安装说明](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/install-guide/index.html#overview)，Linux 端简要步骤如下：
-
-    (1)下载 TensorRT8.2 版本，文件名 TensorRT-XXX.tar.gz，[下载链接](https://developer.nvidia.com/tensorrt)
-
-    (2)解压得到 TensorRT-XXX 文件夹
-
-    (3)通过 export LD_LIBRARY_PATH=TensorRT-XXX/lib:$LD_LIBRARY_PATH 将 lib 路径加入到 LD_LIBRARY_PATH 中
-
-    (4)使用 pip install 安装 TensorRT-XXX/python 中对应的 TensorRT 安装包
-
-2. PaddlePaddle 预测库的安装请参考 [PaddlePaddle 预测库安装文档](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/source_compile.html)，Linux 端简要步骤如下：
-
-    (1)根据 CUDA 环境和 Python 版本下载对应的 PaddlePaddle 预测库，注意须下载支持 TensorRT 的预测包，如 linux-cuda11.2-cudnn8.2-trt8-gcc8.2。[PaddlePaddle 预测库下载路径](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python)
-
-    (2)使用 pip install 安装下载好的 PaddlePaddle 预测库
-
-
-## 2. 序列标注模型推理
-### 2.1 模型获取
-用户可使用自己训练的模型进行推理，具体训练调优方法可参考[模型训练调优](./../../README.md#微调)，也可以使用我们提供的 msra_ner 数据集训练的 ERNIE 3.0 模型，请执行如下命令获取模型：
-```
-# 获取序列标注FP32模型
-wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_pruned_infer_model.zip
-unzip msra_ner_pruned_infer_model.zip
-```
-### 2.2 CPU端推理样例
-在 CPU 端，请使用如下命令进行部署
-```
-python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32
-```
-输出打印如下:
-```
-input data: 北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。
-The model detects all entities:
-entity: 北京   label: LOC   pos: [0, 1]
-entity: 重庆   label: LOC   pos: [6, 7]
-entity: 成都   label: LOC   pos: [12, 13]
------------------------------
-input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。
-The model detects all entities:
-entity: 乔丹   label: PER   pos: [0, 1]
-entity: 科比   label: PER   pos: [3, 4]
-entity: 詹姆斯   label: PER   pos: [6, 8]
-entity: 姚明   label: PER   pos: [10, 11]
------------------------------
-```
-infer_cpu.py 脚本中的参数说明：
-| 参数 |参数说明 |
-|----------|--------------|
-|--task_name | 配置任务名称，可选 seq_cls 或 token_cls，默认为 seq_cls|
-|--model_name_or_path | 模型的路径或者名字，默认为 ernie-3.0-medium-zh|
-|--model_path | 用于推理的 Paddle 模型的路径|
-|--max_seq_length |最大序列长度，默认为 128|
-|--precision_mode | 推理精度，可选 fp32，fp16 或者 int8，当输入非量化模型并设置 int8 时使用动态量化进行加速，默认 fp32 |
-|--num_threads | 配置 cpu 的线程数，默认为 cpu 的最大线程数 |
-
-**Note**：在支持 avx512_vnni 指令集或 Intel® DL Boost 的 CPU 设备上，可设置 precision_mode 为 int8 对 FP32 模型进行动态量化以获得更高的推理性能，具体性能提升情况请查阅[量化性能提升情况](../../README.md#压缩效果)。
-CPU 端，开启动态量化的命令如下：
-```
-python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode int8
-```
-INT8 的输出打印和 FP32 的输出打印一致。
-
-### 2.3 GPU端推理样例
-在 GPU 端，请使用如下命令进行部署
-```
-python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32
-```
-输出打印如下:
-```
-input data: 北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。
-The model detects all entities:
-entity: 北京   label: LOC   pos: [0, 1]
-entity: 重庆   label: LOC   pos: [6, 7]
-entity: 成都   label: LOC   pos: [12, 13]
------------------------------
-input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。
-The model detects all entities:
-entity: 乔丹   label: PER   pos: [0, 1]
-entity: 科比   label: PER   pos: [3, 4]
-entity: 詹姆斯   label: PER   pos: [6, 8]
-entity: 姚明   label: PER   pos: [10, 11]
------------------------------
-```
-如果需要 FP16 进行加速，可以设置 precision_mode 为 fp16，具体命令为
-```
-python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode fp16
-```
-如果需要进行 INT8 量化加速，还需要使用量化脚本对训练好的 FP32 模型进行量化，然后使用量化后的模型进行部署，模型的量化请参考：[模型量化脚本使用说明](./../../README.md#模型压缩)，也可下载我们量化后的 INT8 模型进行部署，请执行如下命令获取模型：
-```
-# 获取序列标注 INT8 量化模型
-wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_quant_infer_model.zip
-unzip msra_ner_quant_infer_model.zip
-```
-量化模型的部署命令为：
-```
-# 第一步，打开 set_dynamic_shape 开关，自动配置动态shape，在当前目录下生成 dynamic_shape_info.txt 文件
-python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape
-# 第二步，读取上一步中生成的 dynamic_shape_info.txt 文件，开启预测
-python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt
-```
-FP16 和 INT8 推理的运行结果和FP32的运行结果一致。
-
-infer_gpu.py 脚本中的参数说明：
-| 参数 |参数说明 |
-|----------|--------------|
-|--task_name | 配置任务名称，可选 seq_cls 或 token_cls，默认为 seq_cls|
-|--model_name_or_path | 模型的路径或者名字，默认为ernie-3.0-medium-zh|
-|--model_path | 用于推理的 Paddle 模型的路径|
-|--batch_size |最大可测的 batch size，默认为 32|
-|--max_seq_length |最大序列长度，默认为 128|
-|--shape_info_file | 指定 dynamic shape info 的存储文件名，默认为 shape_info.txt |
-|--set_dynamic_shape | 配置是否自动配置 TensorRT 的 dynamic shape，在GPU上INT8量化推理时需要先开启此选项进行 dynamic shape 配置，生成 shape_info.txt 后再关闭，默认关闭 |
-|--precision_mode | 推理精度，可选 fp32，fp16 或者 int8，默认 fp32 |
-
-## 3. 分类模型推理
-### 3.1 模型获取
-用户可使用自己训练的模型进行推理，具体训练调优方法可参考[模型训练调优](./../../README.md#微调)，也可以使用我们提供的 tnews 数据集训练的 ERNIE 3.0 模型，请执行如下命令获取模型：
-```
-# 分类模型模型：
-wget  https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_pruned_infer_model.zip
-unzip tnews_pruned_infer_model.zip
-```
-### 3.2 CPU端推理样例
-在 CPU 端，请使用如下命令进行部署
-```
-python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32
-```
-输出打印如下:
-```
-input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
-seq cls result:
-label: news_car   confidence: 0.5543532371520996
------------------------------
-input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
-seq cls result:
-label: news_entertainment   confidence: 0.9495906829833984
------------------------------
-```
-和序列标注模型推理类似，使用动态量化进行加速的命令如下：
-```
-python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode int8
-```
-输出打印如下:
-```
-input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
-seq cls result:
-label: news_car   confidence: 0.5778735876083374
------------------------------
-input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
-seq cls result:
-label: news_entertainment   confidence: 0.9206441044807434
------------------------------
-```
-### 3.3 GPU端推理样例
-在 GPU 端，请使用如下命令进行部署
-```
-python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32
-```
-输出打印如下:
-```
-input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
-seq cls result:
-label: news_car   confidence: 0.5543532371520996
------------------------------
-input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
-seq cls result:
-label: news_entertainment   confidence: 0.9495906829833984
------------------------------
-```
-如果需要 FP16 进行加速，可以设置 precision_mode 为 fp16，具体命令为
-```
-python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode fp16
-```
-输出打印如下:
-```
-input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
-seq cls result:
-label: news_car   confidence: 0.5536671876907349
------------------------------
-input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
-seq cls result:
-label: news_entertainment   confidence: 0.9494127035140991
------------------------------
-```
-如果需要进行 INT8 量化加速，还需要使用量化脚本对训练好的 FP32 模型进行量化，然后使用量化后的模型进行部署，模型的量化请参考：[模型量化脚本使用说明](./../../README.md#模型压缩)，也可下载我们量化后的 INT8 模型进行部署，请执行如下命令获取模型：
-```
-# 获取序列标注 INT8 量化模型
-wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_quant_infer_model.zip
-unzip tnews_quant_infer_model.zip
-```
-量化模型的部署命令为：
-```
-# 第一步，打开 set_dynamic_shape 开关，自动配置动态shape，在当前目录下生成 dynamic_shape_info.txt 文件
-python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape
-# 第二步，读取上一步中生成的 dynamic_shape_info.txt 文件，开启预测
-python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt
-```
-输出打印如下:
-```
-input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？
-seq cls result:
-label: news_car   confidence: 0.5510320067405701
------------------------------
-input data: 黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤
-seq cls result:
-label: news_entertainment   confidence: 0.9432708024978638
------------------------------
-```
diff --git a/model_zoo/text/ernie-3.0/ernie_predictor.py b/model_zoo/text/ernie-3.0/ernie_predictor.py
deleted file mode 100755
index 61162de97..000000000
--- a/model_zoo/text/ernie-3.0/ernie_predictor.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import six
-import os
-import numpy as np
-# import paddle
-from psutil import cpu_count
-from paddlenlp.transformers import AutoTokenizer
-import fastdeploy
-
-
-def token_cls_print_ret(infer_result, input_data):
-    rets = infer_result["value"]
-    for i, ret in enumerate(rets):
-        print("input data:", input_data[i])
-        print("The model detects all entities:")
-        for iterm in ret:
-            print("entity:", iterm["entity"], "  label:", iterm["label"],
-                  "  pos:", iterm["pos"])
-        print("-----------------------------")
-
-
-def seq_cls_print_ret(infer_result, input_data):
-    label_list = [
-        "news_story", "news_culture", "news_entertainment", "news_sports",
-        "news_finance", "news_house", "news_car", "news_edu", "news_tech",
-        "news_military", "news_travel", "news_world", "news_stock",
-        "news_agriculture", "news_game"
-    ]
-    label = infer_result["label"].squeeze().tolist()
-    confidence = infer_result["confidence"].squeeze().tolist()
-    for i, ret in enumerate(infer_result):
-        print("input data:", input_data[i])
-        print("seq cls result:")
-        print("label:", label_list[label[i]], "  confidence:", confidence[i])
-        print("-----------------------------")
-
-
-class ErniePredictor(object):
-    def __init__(self, args):
-        if not isinstance(args.device, six.string_types):
-            print(
-                ">>> [InferBackend] The type of device must be string, but the type you set is: ",
-                type(device))
-            exit(0)
-        args.device = args.device.lower()
-        if args.device not in ['cpu', 'gpu', 'xpu']:
-            print(
-                ">>> [InferBackend] The device must be cpu or gpu, but your device is set to:",
-                type(args.device))
-            exit(0)
-
-        self.task_name = args.task_name
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            args.model_name_or_path, use_faster=True)
-        if args.task_name == 'seq_cls':
-            self.label_names = []
-            self.preprocess = self.seq_cls_preprocess
-            self.postprocess = self.seq_cls_postprocess
-            self.printer = seq_cls_print_ret
-        elif args.task_name == 'token_cls':
-            self.label_names = [
-                'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC'
-            ]
-            self.preprocess = self.token_cls_preprocess
-            self.postprocess = self.token_cls_postprocess
-            self.printer = token_cls_print_ret
-        else:
-            print(
-                "[ErniePredictor]: task_name only support seq_cls and token_cls now."
-            )
-            exit(0)
-
-        self.max_seq_length = args.max_seq_length
-
-        if args.device == 'cpu':
-            args.set_dynamic_shape = False
-            args.shape_info_file = None
-            args.batch_size = 32
-        if args.device == 'gpu':
-            args.num_threads = cpu_count(logical=False)
-        # Set the runtime option
-        runtime_option = fastdeploy.RuntimeOption()
-        runtime_option.set_model_path(args.model_path + ".pdmodel",
-                                      args.model_path + ".pdiparams")
-        precision_mode = args.precision_mode.lower()
-        use_fp16 = precision_mode == "fp16"
-        # runtime_option.use_paddle_backend()
-        if args.device == 'cpu':
-            runtime_option.use_cpu()
-            runtime_option.set_cpu_thread_num(args.num_threads)
-            if use_fp16:
-                runtime_option.enable_paddle_mkldnn()
-        elif args.device == 'gpu':
-            runtime_option.use_gpu()
-            if use_fp16:
-                runtime_option.use_trt_backend()
-                runtime_option.enable_trt_fp16()
-
-        self.inference_backend = fastdeploy.Runtime(runtime_option._option)
-        if args.set_dynamic_shape:
-            # If set_dynamic_shape is turned on, all required dynamic shapes will be
-            # automatically set according to the batch_size and max_seq_length.
-            self.set_dynamic_shape(args.max_seq_length, args.batch_size)
-            exit(0)
-
-    def seq_cls_preprocess(self, input_data: list):
-        data = input_data
-        # tokenizer + pad
-        data = self.tokenizer(
-            data,
-            max_length=self.max_seq_length,
-            padding=True,
-            truncation=True)
-        input_ids = data["input_ids"]
-        token_type_ids = data["token_type_ids"]
-        return {
-            "input_ids": np.array(
-                input_ids, dtype="int64"),
-            "token_type_ids": np.array(
-                token_type_ids, dtype="int64")
-        }
-
-    def seq_cls_postprocess(self, infer_data, input_data):
-        logits = np.array(infer_data[0])
-        max_value = np.max(logits, axis=1, keepdims=True)
-        exp_data = np.exp(logits - max_value)
-        probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
-        out_dict = {
-            "label": probs.argmax(axis=-1),
-            "confidence": probs.max(axis=-1)
-        }
-        return out_dict
-
-    def token_cls_preprocess(self, data: list):
-        # tokenizer + pad
-        is_split_into_words = False
-        if isinstance(data[0], list):
-            is_split_into_words = True
-        data = self.tokenizer(
-            data,
-            max_length=self.max_seq_length,
-            padding=True,
-            truncation=True,
-            is_split_into_words=is_split_into_words)
-
-        input_ids = data["input_ids"]
-        token_type_ids = data["token_type_ids"]
-        return {
-            "input_ids": np.array(
-                input_ids, dtype="int64"),
-            "token_type_ids": np.array(
-                token_type_ids, dtype="int64")
-        }
-
-    def token_cls_postprocess(self, infer_data, input_data):
-        result = np.array(infer_data[0])
-        tokens_label = result.argmax(axis=-1).tolist()
-        # 获取batch中每个token的实体
-        value = []
-        for batch, token_label in enumerate(tokens_label):
-            start = -1
-            label_name = ""
-            items = []
-            for i, label in enumerate(token_label):
-                if (self.label_names[label] == "O" or
-                        "B-" in self.label_names[label]) and start >= 0:
-                    entity = input_data[batch][start:i - 1]
-                    if isinstance(entity, list):
-                        entity = "".join(entity)
-                    items.append({
-                        "pos": [start, i - 2],
-                        "entity": entity,
-                        "label": label_name,
-                    })
-                    start = -1
-                if "B-" in self.label_names[label]:
-                    start = i - 1
-                    label_name = self.label_names[label][2:]
-            if start >= 0:
-                items.append({
-                    "pos": [start, len(token_label) - 1],
-                    "entity": input_data[batch][start:len(token_label) - 1],
-                    "label": ""
-                })
-            value.append(items)
-
-        out_dict = {"value": value, "tokens_label": tokens_label}
-        return out_dict
-
-    def set_dynamic_shape(self, max_seq_length, batch_size):
-        # The dynamic shape info required by TRT is automatically generated
-        # according to max_seq_length and batch_size and stored in shape_info.txt
-        min_batch_size, max_batch_size, opt_batch_size = 1, batch_size, batch_size
-        min_seq_len, max_seq_len, opt_seq_len = 2, max_seq_length, max_seq_length
-        batches = [
-            {
-                "input_ids": np.zeros(
-                    [min_batch_size, min_seq_len], dtype="int64"),
-                "token_type_ids": np.zeros(
-                    [min_batch_size, min_seq_len], dtype="int64")
-            },
-            {
-                "input_ids": np.zeros(
-                    [max_batch_size, max_seq_len], dtype="int64"),
-                "token_type_ids": np.zeros(
-                    [max_batch_size, max_seq_len], dtype="int64")
-            },
-            {
-                "input_ids": np.zeros(
-                    [opt_batch_size, opt_seq_len], dtype="int64"),
-                "token_type_ids": np.zeros(
-                    [opt_batch_size, opt_seq_len], dtype="int64")
-            },
-        ]
-        for batch in batches:
-            self.inference_backend.infer(batch)
-        print(
-            "[InferBackend] Set dynamic shape finished, please close set_dynamic_shape and restart."
-        )
-
-    def infer(self, data):
-        return self.inference_backend.infer(data)
-
-    def predict(self, input_data: list):
-        preprocess_result = self.preprocess(input_data)
-        infer_result = self.infer(preprocess_result)
-        result = self.postprocess(infer_result, input_data)
-        self.printer(result, input_data)
-        return result
diff --git a/model_zoo/text/ernie-3.0/infer_cpu.py b/model_zoo/text/ernie-3.0/infer_cpu.py
deleted file mode 100755
index 3ab8121a5..000000000
--- a/model_zoo/text/ernie-3.0/infer_cpu.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import argparse
-from psutil import cpu_count
-from ernie_predictor import ErniePredictor
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    # Required parameters
-    parser.add_argument(
-        "--task_name",
-        default='seq_cls',
-        type=str,
-        help="The name of the task to perform predict, selected in: seq_cls and token_cls"
-    )
-    parser.add_argument(
-        "--model_name_or_path",
-        default="ernie-3.0-medium-zh",
-        type=str,
-        help="The directory or name of model.", )
-    parser.add_argument(
-        "--model_path",
-        type=str,
-        required=True,
-        help="The path prefix of inference model to be used.", )
-    parser.add_argument(
-        "--max_seq_length",
-        default=128,
-        type=int,
-        help="The maximum total input sequence length after tokenization. Sequences longer "
-        "than this will be truncated, sequences shorter will be padded.", )
-    parser.add_argument(
-        "--precision_mode",
-        type=str,
-        default="fp32",
-        choices=["fp32", "int8"],
-        help="Inference precision, set int8 to use dynamic quantization for acceleration.",
-    )
-    parser.add_argument(
-        "--num_threads",
-        default=cpu_count(logical=False),
-        type=int,
-        help="num_threads for cpu.", )
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-
-    args.task_name = args.task_name.lower()
-    args.device = 'cpu'
-    predictor = ErniePredictor(args)
-
-    if args.task_name == 'seq_cls':
-        text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？", "黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤"]
-    elif args.task_name == 'token_cls':
-        text = ["北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"]
-
-    outputs = predictor.predict(text)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/model_zoo/text/ernie-3.0/infer_gpu.py b/model_zoo/text/ernie-3.0/infer_gpu.py
deleted file mode 100755
index 4175a2929..000000000
--- a/model_zoo/text/ernie-3.0/infer_gpu.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import argparse
-from ernie_predictor import ErniePredictor
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    # Required parameters
-    parser.add_argument(
-        "--task_name",
-        default='seq_cls',
-        type=str,
-        help="The name of the task to perform predict, selected in: seq_cls and token_cls"
-    )
-    parser.add_argument(
-        "--model_name_or_path",
-        default="ernie-3.0-medium-zh",
-        type=str,
-        help="The directory or name of model.", )
-    parser.add_argument(
-        "--model_path",
-        type=str,
-        required=True,
-        help="The path prefix of inference model to be used.", )
-    parser.add_argument(
-        "--batch_size",
-        default=32,
-        type=int,
-        help="Batch size for predict.", )
-    parser.add_argument(
-        "--max_seq_length",
-        default=128,
-        type=int,
-        help="The maximum total input sequence length after tokenization. Sequences longer "
-        "than this will be truncated, sequences shorter will be padded.", )
-    parser.add_argument(
-        "--set_dynamic_shape",
-        action='store_true',
-        help="Whether to automatically set dynamic shape.", )
-    parser.add_argument(
-        "--shape_info_file",
-        default="shape_info.txt",
-        type=str,
-        help="The collected dynamic shape info file.", )
-    parser.add_argument(
-        "--precision_mode",
-        type=str,
-        default="fp32",
-        choices=["fp32", "fp16", "int8"],
-        help="Inference precision.", )
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    args.task_name = args.task_name.lower()
-    args.device = 'gpu'
-    predictor = ErniePredictor(args)
-
-    if args.task_name == 'seq_cls':
-        text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗？", "黄磊接受华少快问快答，不光智商逆天，情商也不逊黄渤"]
-    elif args.task_name == 'token_cls':
-        text = ["北京的涮肉，重庆的火锅，成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"]
-
-    outputs = predictor.predict(text)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/model_zoo/text/ernie-3.0/requirements_cpu.txt b/model_zoo/text/ernie-3.0/requirements_cpu.txt
deleted file mode 100755
index 9725b9194..000000000
--- a/model_zoo/text/ernie-3.0/requirements_cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-onnxruntime
-psutil
-paddlenlp
diff --git a/model_zoo/text/ernie-3.0/requirements_gpu.txt b/model_zoo/text/ernie-3.0/requirements_gpu.txt
deleted file mode 100755
index bd5e113bf..000000000
--- a/model_zoo/text/ernie-3.0/requirements_gpu.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-onnxruntime-gpu
-onnxconverter-common
-psutil
-paddlenlp
diff --git a/model_zoo/vision/arcface/README.md b/model_zoo/vision/arcface/README.md
deleted file mode 100644
index 478b695c1..000000000
--- a/model_zoo/vision/arcface/README.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# ArcFace部署示例
-
-## 0. 简介
-当前支持模型版本为：[ArcFace CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5)
-
-本文档说明如何进行[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch) 的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp                     # C++ 代码目录
-│   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
-│   ├── README.md           # C++ 代码编译部署文档
-│   └── arcface.cc          # C++ 示例代码
-├── api.md                  # API 说明文档
-├── README.md               # ArcFace 部署文档
-└── arcface.py              # Python示例代码
-```
-
-## 1. 特别说明  
-fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署，包括ArcFace、CosFace、Partial FC、VPL等，由于用法类似，这里仅用ArcFace来演示部署流程。所有支持的模型结构，可参考 [ArcFace API文档](./api.md).
-
-
-## 2. 获取ONNX文件
-
-访问[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)官方github库，按照指引下载安装，下载pt模型文件，利用 `torch2onnx.py` 得到`onnx`格式文件。
-
-* 下载ArcFace模型文件
-  ```
-  Link: https://pan.baidu.com/share/init?surl=CL-l4zWqsI1oDuEEYVhj-g code: e8pw  
-  ```
-
-* 导出onnx格式文件
-  ```bash
-  PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1
-  ```
-* 移动onnx文件到model_zoo/arcface的目录
-  ```bash
-  cp PATH/TO/ms1mv3_arcface_r100.onnx PATH/TO/model_zoo/vision/arcface/
-  ```
-
-
-## 3. 准备测试图片
-准备3张仅包含人脸的测试图片，命名为face_recognition_*.jpg，并拷贝到可执行文件所在的目录，比如
-```bash
-face_recognition_0.png  # 0,1 同一个人
-face_recognition_1.png
-face_recognition_2.png  # 0,2 不同的人
-```
-
-## 4. 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```bash
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## 5. Python部署
-
-执行如下代码即会自动下载ArcFace模型和测试图片
-```bash
-python arcface.py
-```
-
-执行完成后会输出检测结果如下
-```
-FaceRecognitionResult: [Dim(512), Min(-0.141219), Max(0.121645), Mean(-0.003172)]
-FaceRecognitionResult: [Dim(512), Min(-0.117939), Max(0.141897), Mean(0.000407)]
-FaceRecognitionResult: [Dim(512), Min(-0.124471), Max(0.112567), Mean(-0.001320)]
-Cosine 01:  0.7211584683376316
-Cosine 02:  -0.06262668682788906
-```
-
-## 6. 其它文档
-
-- [C++部署](./cpp/README.md)
-- [ArcFace API文档](./api.md)
diff --git a/model_zoo/vision/arcface/api.md b/model_zoo/vision/arcface/api.md
deleted file mode 100644
index b509d5cb2..000000000
--- a/model_zoo/vision/arcface/api.md
+++ /dev/null
@@ -1,113 +0,0 @@
-# ArcFace API说明
-
-## 0. 特别说明  
-fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署，包括ArcFace、CosFace、Partial FC、VPL等，由于用法类似，这里仅用ArcFace来说明参数设置。
-
-## 1. Python API
-
-### 1.1 ArcFace 类
-
-#### 1.1.1 类初始化说明
-```python
-fastdeploy.vision.deepinsight.ArcFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-ArcFace模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`xxx.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### 1.1.2 predict函数
-> ```python
-> ArcFace.predict(image_data)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-示例代码参考[arcface.py](./arcface.py)
-
-### 1.2 其他支持的类
-```python
-fastdeploy.vision.deepinsight.ArcFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-fastdeploy.vision.deepinsight.CosFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-fastdeploy.vision.deepinsight.PartialFC(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-fastdeploy.vision.deepinsight.VPL(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-fastdeploy.vision.deepinsight.InsightFaceRecognitionModel(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-Tips: 如果 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 人脸识别的推理逻辑没有随它自身的版本发生太大变化，则可以都统一使用 InsightFaceRecognitionModel 进行推理。
-
-
-
-## 2. C++ API
-
-### 2.1 ArcFace 类
-#### 2.1.1 类初始化说明
-```C++
-fastdeploy::vision::deepinsight::ArcFace(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-ArcFace模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`xxx.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### 2.1.2 Predict函数
-> ```C++
-> ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，result的成员embedding包含人脸向量
-
-示例代码参考[cpp/arcface.cc](cpp/arcface.cc)
-
-### 2.2 其他支持的类
-```C++
-fastdeploy::vision::deepinsight::ArcFace(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-fastdeploy::vision::deepinsight::CosFace(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-fastdeploy::vision::deepinsight::PartialFC(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-fastdeploy::vision::deepinsight::VPL(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);
-fastdeploy::vision::deepinsight::InsightFaceRecognitionModel(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX);  
-```
-Tips: 如果 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 人脸识别的推理逻辑没有随它自身的版本发生太大变化，则可以都统一使用 InsightFaceRecognitionModel 进行推理。
-
-
-## 3. 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/arcface/arcface.py b/model_zoo/vision/arcface/arcface.py
deleted file mode 100644
index 8c5fdfdf1..000000000
--- a/model_zoo/vision/arcface/arcface.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import fastdeploy as fd
-import numpy as np
-import cv2
-
-
-# 余弦相似度
-def cosine_similarity(a, b):
-    a = np.array(a)
-    b = np.array(b)
-    mul_a = np.linalg.norm(a, ord=2)
-    mul_b = np.linalg.norm(b, ord=2)
-    mul_ab = np.dot(a, b)
-    return mul_ab / (np.sqrt(mul_a) * np.sqrt(mul_b))
-
-
-# 加载模型
-model = fd.vision.deepinsight.ArcFace("ms1mv3_arcface_r100.onnx")
-print("Initialed model!")
-
-# 加载图片
-face0 = cv2.imread("face_recognition_0.png")  # 0,1 同一个人
-face1 = cv2.imread("face_recognition_1.png")
-face2 = cv2.imread("face_recognition_2.png")  # 0,2 不同的人
-
-# 设置 l2 normalize
-model.l2_normalize = True
-
-result0 = model.predict(face0)
-result1 = model.predict(face1)
-result2 = model.predict(face2)
-
-# 计算余弦相似度
-embedding0 = result0.embedding
-embedding1 = result1.embedding
-embedding2 = result2.embedding
-
-cosine01 = cosine_similarity(embedding0, embedding1)
-cosine02 = cosine_similarity(embedding0, embedding2)
-
-# 打印结果
-print(result0, end="")
-print(result1, end="")
-print(result2, end="")
-print("Cosine 01: ", cosine01)
-print("Cosine 02: ", cosine02)
-print(model.runtime_option)
diff --git a/model_zoo/vision/arcface/cpp/CMakeLists.txt b/model_zoo/vision/arcface/cpp/CMakeLists.txt
deleted file mode 100644
index 387324f16..000000000
--- a/model_zoo/vision/arcface/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(arcface_demo C CXX)
-CMAKE_MINIMUM_REQUIRED(VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(arcface_demo ${PROJECT_SOURCE_DIR}/arcface.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(arcface_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/arcface/cpp/README.md b/model_zoo/vision/arcface/cpp/README.md
deleted file mode 100644
index bb7145d32..000000000
--- a/model_zoo/vision/arcface/cpp/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# 编译ArcFace示例
-
-## 0. 简介
-当前支持模型版本为：[ArcFace CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5)
-
-## 1. 下载和解压预测库
-```bash
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.3.0.tgz
-tar xvf fastdeploy-linux-x64-0.3.0.tgz
-```
-
-## 1. 编译示例代码
-```bash
-mkdir build & cd build
-cmake ..
-make -j
-```
-
-## 3. 特别说明  
-fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署，包括ArcFace、CosFace、Partial FC、VPL等，由于用法类似，这里仅用ArcFace来演示部署流程。所有支持的模型结构，可参考 [ArcFace API文档](../api.md).
-
-## 4. 获取ONNX文件
-
-访问[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)官方github库，按照指引下载安装，下载pt模型文件，利用 `torch2onnx.py` 得到`onnx`格式文件。
-
-* 下载ArcFace模型文件
-  ```
-  Link: https://pan.baidu.com/share/init?surl=CL-l4zWqsI1oDuEEYVhj-g code: e8pw  
-  ```
-
-* 导出onnx格式文件
-  ```bash
-  PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1
-  ```
-* 移动onnx文件到model_zoo/arcface的目录
-  ```bash
-  cp PATH/TO/ms1mv3_arcface_r100.onnx PATH/TO/model_zoo/vision/arcface/
-  ```
-
-
-## 5. 准备测试图片
-准备3张仅包含人脸的测试图片，命名为face_recognition_*.jpg，并拷贝到可执行文件所在的目录，比如
-```bash
-face_recognition_0.png  # 0,1 同一个人
-face_recognition_1.png
-face_recognition_2.png  # 0,2 不同的人
-```
-
-## 6. 执行
-```bash
-./arcface_demo
-```
-
-执行完成后会输出检测结果如下
-```
-FaceRecognitionResult: [Dim(512), Min(-0.141219), Max(0.121645), Mean(-0.003172)]
-FaceRecognitionResult: [Dim(512), Min(-0.117939), Max(0.141897), Mean(0.000407)]
-FaceRecognitionResult: [Dim(512), Min(-0.124471), Max(0.112567), Mean(-0.001320)]
-Cosine 01:  0.7211584683376316
-Cosine 02:  -0.06262668682788906
-```
diff --git a/model_zoo/vision/arcface/cpp/arcface.cc b/model_zoo/vision/arcface/cpp/arcface.cc
deleted file mode 100644
index 723bf9398..000000000
--- a/model_zoo/vision/arcface/cpp/arcface.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-#include "fastdeploy/vision/utils/utils.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  // 0,1 同一个人, 0,2 不同的人
-  std::string model_file = "./ms1mv3_arcface_r100.onnx";
-  std::string face0_path = "./face_recognition_0.png";
-  std::string face1_path = "./face_recognition_1.png";
-  std::string face2_path = "./face_recognition_2.png";
-
-  auto model = vis::deepinsight::ArcFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-  // 设置输出l2 normalize后的embedding
-  model.l2_normalize = true;
-
-  cv::Mat face0 = cv::imread(face0_path);
-  cv::Mat face1 = cv::imread(face1_path);
-  cv::Mat face2 = cv::imread(face2_path);
-
-  vis::FaceRecognitionResult res0;
-  vis::FaceRecognitionResult res1;
-  vis::FaceRecognitionResult res2;
-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测框结果
-  std::cout << "--- [Face 0]:" << res0.Str();
-  std::cout << "--- [Face 1]:" << res1.Str();
-  std::cout << "--- [Face 2]:" << res2.Str();
-
-  // 计算余弦相似度
-  float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding,
-                                                model.l2_normalize);
-  float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding,
-                                                model.l2_normalize);
-  std::cout << "Detect Done! Cosine 01: " << cosine01
-            << ", Cosine 02:" << cosine02 << std::endl;
-  return 0;
-}
diff --git a/model_zoo/vision/modnet/README.md b/model_zoo/vision/modnet/README.md
deleted file mode 100644
index 90b3fafdc..000000000
--- a/model_zoo/vision/modnet/README.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# MODNet 部署示例
-
-## 0. 简介
-当前支持模型版本为：[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4)
-
-本文档说明如何进行[MODNet](https://github.com/ZHKKKe/MODNet) 的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp                     # C++ 代码目录
-│   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
-│   ├── README.md           # C++ 代码编译部署文档
-│   └── modnet.cc           # C++ 示例代码
-├── api.md                  # API 说明文档
-├── README.md               # MODNet 部署文档
-└── modnet.py               # Python示例代码
-```
-
-## 1. 获取ONNX文件
-
-访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库，按照指引下载安装，下载模型文件，利用 `onnx/export_onnx.py` 得到`onnx`格式文件。
-
-* 导出onnx格式文件
-  ```bash
-  python -m onnx.export_onnx \
-    --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
-    --output-path=pretrained/modnet_photographic_portrait_matting.onnx
-  ```
-* 移动onnx文件到model_zoo/modnet的目录
-  ```bash
-  cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/
-  ```
-
-
-## 2. 准备测试图片
-准备1张仅包含人像的测试图片，命名为matting_1.jpg，并拷贝到可执行文件所在的目录，比如
-```bash
-matting_1.jpg
-```
-
-## 3. 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```bash
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## 4. Python部署
-
-执行如下代码即会自动下载MODNet模型和测试图片
-```bash
-python modnet.py
-```
-
-执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中
-```
-MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))]
-```
-
-## 5. 其它文档
-
-- [C++部署](./cpp/README.md)
-- [MODNet API文档](./api.md)
diff --git a/model_zoo/vision/modnet/api.md b/model_zoo/vision/modnet/api.md
deleted file mode 100644
index 164ef099e..000000000
--- a/model_zoo/vision/modnet/api.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# MODNet API说明
-
-## 1. Python API
-
-### 1.1 MODNet 类
-
-#### 1.1.1 类初始化说明
-```python
-fastdeploy.vision.zhkkke.MODNet(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-MODNet模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`xxx.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### 1.1.2 predict函数
-> ```python
-> MODNet.predict(image_data)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-示例代码参考[modnet.py](./modnet.py)
-
-
-## 2. C++ API
-
-### 2.1 MODNet 类
-#### 2.1.1 类初始化说明
-```C++
-fastdeploy::vision::zhkkke::MODNet(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-MODNet模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`xxx.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### 2.1.2 Predict函数
-> ```C++
-> MODNet::Predict(cv::Mat* im, MattingResult* result)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包含的成员如下
-> >     * alpha: std::vector\<float\> 包含透明度
-> >     * contain_foreground: bool 表示输出是否包含预测的前景
-> >     * foreground: std::vector\<float\> 如果模型包含前景预测，则此项为预测的前景
-> >     * shape: std::vector\<int\> 包含输出alpha的维度(h,w), 如果包含前景，则shape为(h,w,c) c表示前景的通道数，一般为c=3
-
-示例代码参考[cpp/modnet.cc](cpp/modnet.cc)
-
-## 3. 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/modnet/cpp/CMakeLists.txt b/model_zoo/vision/modnet/cpp/CMakeLists.txt
deleted file mode 100644
index 9e500debe..000000000
--- a/model_zoo/vision/modnet/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(modnet_demo C CXX)
-CMAKE_MINIMUM_REQUIRED(VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(modnet_demo ${PROJECT_SOURCE_DIR}/modnet.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(modnet_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/modnet/cpp/README.md b/model_zoo/vision/modnet/cpp/README.md
deleted file mode 100644
index f2b2e4949..000000000
--- a/model_zoo/vision/modnet/cpp/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# 编译ArcFace示例
-
-## 0. 简介
-当前支持模型版本为：[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4)
-
-## 1. 下载和解压预测库
-```bash
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.3.0.tgz
-tar xvf fastdeploy-linux-x64-0.3.0.tgz
-```
-
-## 2. 编译示例代码
-```bash
-mkdir build & cd build
-cmake ..
-make -j
-```
-
-## 3. 获取ONNX文件
-
-访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库，按照指引下载安装，下载模型文件，利用 `onnx/export_onnx.py` 得到`onnx`格式文件。
-
-* 导出onnx格式文件
-  ```bash
-  python -m onnx.export_onnx \
-    --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \
-    --output-path=pretrained/modnet_photographic_portrait_matting.onnx
-  ```
-* 移动onnx文件到model_zoo/modnet的目录
-  ```bash
-  cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/
-  ```
-
-
-## 4. 准备测试图片
-准备1张仅包含人像的测试图片，命名为matting_1.jpg，并拷贝到可执行文件所在的目录，比如
-```bash
-matting_1.jpg
-```
-
-## 5. 执行
-```bash
-./modnet_demo
-```
-
-执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中
-```
-MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))]
-```
diff --git a/model_zoo/vision/modnet/cpp/modnet.cc b/model_zoo/vision/modnet/cpp/modnet.cc
deleted file mode 100644
index b89b2d1dc..000000000
--- a/model_zoo/vision/modnet/cpp/modnet.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  std::string model_file = "./modnet_photographic_portrait_matting.onnx";
-  std::string img_path = "./matting_1.jpg";
-  std::string vis_path = "./vis_result.jpg";
-
-  auto model = vis::zhkkke::MODNet(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  // 设置推理size, 必须和模型文件一致
-  model.size = {256, 256};
-
-  cv::Mat im = cv::imread(img_path);
-  cv::Mat im_old = im.clone();
-  cv::Mat vis_im = im.clone();
-
-  vis::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-  std::cout << "Prediction Done!" << std::endl;
-
-  // 输出预测结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  bool remove_small_connected_area = true;
-  vis::Visualize::VisMattingAlpha(im_old, res, &vis_im,
-                                  remove_small_connected_area);
-  cv::imwrite(vis_path, vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/model_zoo/vision/modnet/modnet.py b/model_zoo/vision/modnet/modnet.py
deleted file mode 100644
index 538a667bd..000000000
--- a/model_zoo/vision/modnet/modnet.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 加载模型
-model = fd.vision.zhkkke.MODNet("modnet_photographic_portrait_matting.onnx")
-
-# 设置模型输入大小
-model.size = (256, 256)
-
-# 预测图片
-im = cv2.imread("./matting_1.jpg")
-im_old = im.copy()
-vis_im = im.copy()
-
-result = model.predict(im)
-# 可视化结果
-fd.vision.visualize.vis_matting_alpha(im_old, result, vis_im, False)
-cv2.imwrite("vis_result.jpg", vis_im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/nanodet_plus/README.md b/model_zoo/vision/nanodet_plus/README.md
deleted file mode 100644
index 164f7691f..000000000
--- a/model_zoo/vision/nanodet_plus/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# NanoDetPlus部署示例
-
-当前支持模型版本为：[NanoDetPlus v1.0.0-alpha-1](https://github.com/RangiLyu/nanodet/releases/tag/v1.0.0-alpha-1)
-
-本文档说明如何进行[NanoDetPlus](https://github.com/RangiLyu/nanodet)的快速部署推理。本目录结构如下
-```
-.
-├── cpp                  # C++ 代码目录
-│   ├── CMakeLists.txt   # C++ 代码编译CMakeLists文件
-│   ├── README.md        # C++ 代码编译部署文档
-│   └── nanodet_plus.cc  # C++ 示例代码
-├── README.md            # YOLOX 部署文档
-└── nanodet_plus.py      # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载NanoDetPlus模型和测试图片
-```
-python nanodet_plus.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-5.710144,220.634033, 807.854370, 724.089111, 0.825635, 5
-45.646439,393.694061, 229.267044, 903.998413, 0.818263, 0
-218.289322,402.268829, 342.083252, 861.766479, 0.709301, 0
-698.587036,325.627197, 809.000000, 876.990967, 0.630235, 0
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [NanoDetPlus API文档](./api.md)
diff --git a/model_zoo/vision/nanodet_plus/api.md b/model_zoo/vision/nanodet_plus/api.md
deleted file mode 100644
index b428e39df..000000000
--- a/model_zoo/vision/nanodet_plus/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# NanoDetPlus API说明
-
-## Python API
-
-### NanoDetPlus类
-```
-fastdeploy.vision.rangilyu.NanoDetPlus(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-NanoDetPlus模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`nanodet-plus-m_320.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> NanoDetPlus.predict(image_data, conf_threshold=0.35, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[nanodet_plus.py](./nanodet_plus.py)
-
-
-## C++ API
-
-### NanoDetPlus类
-```
-fastdeploy::vision::rangilyu::NanoDetPlus(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-NanoDetPlus模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`nanodet-plus-m_320.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> NanoDetPlus::Predict(cv::Mat* im, DetectionResult* result,
->                      float conf_threshold = 0.35,
->                      float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/nanodet_plus.cc](cpp/nanodet_plus.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt b/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt
deleted file mode 100644
index 7a78ef9e4..000000000
--- a/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(nanodet_plus_demo C CXX)
-CMAKE_MINIMUM_REQUIRED(VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(nanodet_plus_demo ${PROJECT_SOURCE_DIR}/nanodet_plus.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(nanodet_plus_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/nanodet_plus/cpp/README.md b/model_zoo/vision/nanodet_plus/cpp/README.md
deleted file mode 100644
index 03dc65a0a..000000000
--- a/model_zoo/vision/nanodet_plus/cpp/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# 编译NanoDetPlus示例
-
-当前支持模型版本为：[NanoDetPlus v1.0.0-alpha-1](https://github.com/RangiLyu/nanodet/releases/tag/v1.0.0-alpha-1)
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 下载模型和图片
-wget https://github.com/RangiLyu/nanodet/releases/download/v1.0.0-alpha-1/nanodet-plus-m_320.onnx
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-
-# 执行
-./nanodet_plus_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-5.710144,220.634033, 807.854370, 724.089111, 0.825635, 5
-45.646439,393.694061, 229.267044, 903.998413, 0.818263, 0
-218.289322,402.268829, 342.083252, 861.766479, 0.709301, 0
-698.587036,325.627197, 809.000000, 876.990967, 0.630235, 0
-```
diff --git a/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc b/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc
deleted file mode 100644
index b252bf6f8..000000000
--- a/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::rangilyu::NanoDetPlus("nanodet-plus-m_320.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("bus.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/nanodet_plus/nanodet_plus.py b/model_zoo/vision/nanodet_plus/nanodet_plus.py
deleted file mode 100644
index 4101d2040..000000000
--- a/model_zoo/vision/nanodet_plus/nanodet_plus.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-model_url = "https://github.com/RangiLyu/nanodet/releases/download/v1.0.0-alpha-1/nanodet-plus-m_320.onnx"
-test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.rangilyu.NanoDetPlus("nanodet-plus-m_320.onnx")
-
-# 预测图片
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.35, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/ppseg/ppseg_unet.py b/model_zoo/vision/ppseg/ppseg_unet.py
deleted file mode 100644
index 750e2167b..000000000
--- a/model_zoo/vision/ppseg/ppseg_unet.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import fastdeploy as fd
-import cv2
-import tarfile
-
-# 下载模型和测试图片
-model_url = "https://github.com/felixhjh/Fastdeploy-Models/raw/main/unet_Cityscapes.tar.gz"
-test_jpg_url = "https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png"
-fd.download_and_decompress(model_url, ".")
-fd.download(test_jpg_url, ".", show_progress=True)
-# 加载模型
-model = fd.vision.ppseg.Model("./unet_Cityscapes/model.pdmodel",
-                              "./unet_Cityscapes/model.pdiparams",
-                              "./unet_Cityscapes/deploy.yaml")
-
-# 预测图片
-im = cv2.imread("./cityscapes_demo.png")
-result = model.predict(im)
-
-vis_im = im.copy()
-# 可视化结果
-fd.vision.visualize.vis_segmentation(im, result, vis_im)
-cv2.imwrite("vis_img.png", vis_im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/retinaface/README.md b/model_zoo/vision/retinaface/README.md
deleted file mode 100644
index 2b1902740..000000000
--- a/model_zoo/vision/retinaface/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# RetinaFace部署示例
-
-当前支持模型版本为：[RetinaFace CommitID:b984b4b](https://github.com/biubug6/Pytorch_Retinaface/commit/b984b4b)
-
-本文档说明如何进行[RetinaFace](https://github.com/biubug6/Pytorch_Retinaface)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp                     # C++ 代码目录
-│   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
-│   ├── README.md           # C++ 代码编译部署文档
-│   └── retinaface.cc       # C++ 示例代码
-├── api.md                  # API 说明文档
-├── README.md               # RetinaFace 部署文档
-└── retinaface.py           # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```bash
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载RetinaFace模型和测试图片
-```bash
-python retinaface.py
-```
-
-## 手动获取ONNX模型文件
-自动下载的模型文件是我们事先转换好的，如果您需要从RetinaFace官方repo导出ONNX，请参考以下步骤。  
-
-* 下载官方仓库并
-```bash
-git clone https://github.com/biubug6/Pytorch_Retinaface.git
-```
-* 下载预训练权重并放在weights文件夹
-```text
-./weights/
-      mobilenet0.25_Final.pth
-      mobilenetV1X0.25_pretrain.tar
-      Resnet50_Final.pth
-```
-* 运行convert_to_onnx.py导出ONNX模型文件
-```bash
-PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/mobilenet0.25_Final.pth --network mobile0.25 --long_side 640 --cpu
-PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/Resnet50_Final.pth --network resnet50 --long_side 640 --cpu
-```
-注意：需要先对convert_to_onnx.py脚本中的--long_side参数增加类型约束，type=int.
-* 使用onnxsim对模型进行简化
-```bash
-onnxsim FaceDetector.onnx Pytorch_RetinaFace_mobile0.25-640-640.onnx  # mobilenet
-onnxsim FaceDetector.onnx Pytorch_RetinaFace_resnet50-640-640.onnx  # resnet50
-```
-
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
-403.339783,254.192413, 490.002747, 351.931213, 0.999427, (425.657257,293.820740), (467.249451,293.667267), (446.830078,315.016388), (428.903381,326.129425), (465.764648,325.837341)
-296.834564,181.992035, 384.516876, 277.461243, 0.999194, (313.605164,224.800110), (352.888977,219.088043), (333.530182,239.872787), (325.395203,255.463852), (358.417175,250.529892)
-742.206238,263.547424, 840.871765, 366.171387, 0.999068, (762.715759,308.939880), (809.019653,304.544830), (786.174194,329.286163), (771.952271,341.376038), (812.717529,337.528839)
-545.351685,228.015930, 635.423584, 335.458649, 0.998681, (559.295654,269.971619), (598.439758,273.823608), (567.496643,292.894348), (558.160034,306.637238), (592.175781,309.493591)
-180.078125,241.787888, 257.213135, 320.321777, 0.998342, (203.702591,272.032715), (237.497726,271.356445), (222.380402,288.225708), (208.015259,301.360352), (233.943451,300.801636)
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [RetinaFace API文档](./api.md)
diff --git a/model_zoo/vision/retinaface/api.md b/model_zoo/vision/retinaface/api.md
deleted file mode 100644
index 47afddc87..000000000
--- a/model_zoo/vision/retinaface/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# RetinaFace API说明
-
-## Python API
-
-### RetinaFace类
-```
-fastdeploy.vision.biubug6.RetinaFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-RetinaFace模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`Pytorch_RetinaFace_mobile0.25-640-640.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> RetinaFace.predict(image_data, conf_threshold=0.7, nms_iou_threshold=0.3)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[retinaface.py](./retinaface.py)
-
-
-## C++ API
-
-### RetinaFace 类
-```
-fastdeploy::vision::biubug6::RetinaFace(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-RetinaFace模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`Pytorch_RetinaFace_mobile0.25-640-640.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> RetinaFace::Predict(cv::Mat* im, FaceDetectionResult* result,
->                     float conf_threshold = 0.7,
->                     float nms_iou_threshold = 0.3)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/retinaface.cc](cpp/retinaface.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/retinaface/cpp/CMakeLists.txt b/model_zoo/vision/retinaface/cpp/CMakeLists.txt
deleted file mode 100644
index 7ca567b82..000000000
--- a/model_zoo/vision/retinaface/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(retinaface_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(retinaface_demo ${PROJECT_SOURCE_DIR}/retinaface.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(retinaface_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/retinaface/cpp/README.md b/model_zoo/vision/retinaface/cpp/README.md
deleted file mode 100644
index ba400b570..000000000
--- a/model_zoo/vision/retinaface/cpp/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# 编译RetinaFace示例
-
-当前支持模型版本为：[RetinaFace CommitID:b984b4b](https://github.com/biubug6/Pytorch_Retinaface/commit/b984b4b)
-
-## 下载和解压预测库
-```bash
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-```
-
-## 编译示例代码
-```bash
-mkdir build & cd build
-cmake ..
-make -j
-```
-
-## 下载模型和图片
-wget https://github.com/DefTruth/Pytorch_Retinaface/releases/download/v0.1/Pytorch_RetinaFace_mobile0.25-640-640.onnx  
-wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg
-
-## 手动获取ONNX模型文件
-自动下载的模型文件是我们事先转换好的，如果您需要从RetinaFace官方repo导出ONNX，请参考以下步骤。  
-
-* 下载官方仓库并
-```bash
-git clone https://github.com/biubug6/Pytorch_Retinaface.git
-```
-* 下载预训练权重并放在weights文件夹
-```text
-./weights/
-      mobilenet0.25_Final.pth
-      mobilenetV1X0.25_pretrain.tar
-      Resnet50_Final.pth
-```
-* 运行convert_to_onnx.py导出ONNX模型文件
-```bash
-PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/mobilenet0.25_Final.pth --network mobile0.25 --long_side 640 --cpu
-PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/Resnet50_Final.pth --network resnet50 --long_side 640 --cpu
-```
-注意：需要先对convert_to_onnx.py脚本中的--long_side参数增加类型约束，type=int.
-* 使用onnxsim对模型进行简化
-```bash
-onnxsim FaceDetector.onnx Pytorch_RetinaFace_mobile0.25-640-640.onnx  # mobilenet
-onnxsim FaceDetector.onnx Pytorch_RetinaFace_resnet50-640-640.onnx  # resnet50
-```
-
-## 执行
-```bash
-./retinaface_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
-403.339783,254.192413, 490.002747, 351.931213, 0.999427, (425.657257,293.820740), (467.249451,293.667267), (446.830078,315.016388), (428.903381,326.129425), (465.764648,325.837341)
-296.834564,181.992035, 384.516876, 277.461243, 0.999194, (313.605164,224.800110), (352.888977,219.088043), (333.530182,239.872787), (325.395203,255.463852), (358.417175,250.529892)
-742.206238,263.547424, 840.871765, 366.171387, 0.999068, (762.715759,308.939880), (809.019653,304.544830), (786.174194,329.286163), (771.952271,341.376038), (812.717529,337.528839)
-545.351685,228.015930, 635.423584, 335.458649, 0.998681, (559.295654,269.971619), (598.439758,273.823608), (567.496643,292.894348), (558.160034,306.637238), (592.175781,309.493591)
-180.078125,241.787888, 257.213135, 320.321777, 0.998342, (203.702591,272.032715), (237.497726,271.356445), (222.380402,288.225708), (208.015259,301.360352), (233.943451,300.801636)
-```
diff --git a/model_zoo/vision/retinaface/cpp/retinaface.cc b/model_zoo/vision/retinaface/cpp/retinaface.cc
deleted file mode 100644
index 933b629c4..000000000
--- a/model_zoo/vision/retinaface/cpp/retinaface.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  auto model =
-      vis::biubug6::RetinaFace("Pytorch_RetinaFace_mobile0.25-640-640.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread("3.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.7f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/model_zoo/vision/retinaface/retinaface.py b/model_zoo/vision/retinaface/retinaface.py
deleted file mode 100644
index 0c5bd4e33..000000000
--- a/model_zoo/vision/retinaface/retinaface.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型
-model_url = "https://github.com/DefTruth/Pytorch_Retinaface/releases/download/v0.1/Pytorch_RetinaFace_mobile0.25-640-640.onnx"
-test_img_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_img_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.biubug6.RetinaFace(
-    "Pytorch_RetinaFace_mobile0.25-640-640.onnx")
-
-# 预测图片
-im = cv2.imread("3.jpg")
-result = model.predict(im, conf_threshold=0.7, nms_iou_threshold=0.3)
-
-# 可视化结果
-vis_im = fd.vision.visualize.vis_face_detection(im, result)
-cv2.imwrite("vis_result.jpg", vis_im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/scaledyolov4/README.md b/model_zoo/vision/scaledyolov4/README.md
deleted file mode 100644
index 93d3bd6c1..000000000
--- a/model_zoo/vision/scaledyolov4/README.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# 编译ScaledYOLOv4示例
-
-当前支持模型版本为：[ScaledYOLOv4 branch yolov4-large](https://github.com/WongKinYiu/ScaledYOLOv4)
-
-本文档说明如何进行[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp
-│   ├── CMakeLists.txt
-│   ├── README.md
-│   └── scaledyolov4.cc
-├── README.md
-└── scaled_yolov4.py
-```
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)官方github库，按照指引下载安装，下载`scaledyolov4.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现问题，可以参考[ScaledYOLOv4#401](https://github.com/WongKinYiu/ScaledYOLOv4/issues/401)的解决办法
-
-  ```
-  #下载ScaledYOLOv4模型文件
-  Download from the goole drive https://drive.google.com/file/d/1aXZZE999sHMP1gev60XhNChtHPRMH3Fz/view?usp=sharing
-
-  # 导出onnx格式文件
-  python models/export.py  --weights PATH/TO/scaledyolov4-xx.pt --img-size 640
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/
-  ```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-## Python部署
-
-执行如下代码即会自动下载测试图片
-```
-python scaled_yolov4.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-665.666321,390.477173, 810.000000, 879.829346, 0.940627, 0
-48.266064,396.217163, 247.338425, 901.974915, 0.922277, 0
-221.351868,408.446259, 345.524017, 857.927917, 0.910516, 0
-14.989746,228.662842, 801.292236, 735.677490, 0.820487, 5
-0.000000,548.260864, 75.825439, 873.932495, 0.718777, 0
-134.789062,473.950195, 148.526367, 506.777344, 0.513963, 27
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [ScaledYOLOv4 API文档](./api.md)
diff --git a/model_zoo/vision/scaledyolov4/api.md b/model_zoo/vision/scaledyolov4/api.md
deleted file mode 100644
index e23559229..000000000
--- a/model_zoo/vision/scaledyolov4/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# ScaledYOLOv4 API说明
-
-## Python API
-
-### ScaledYOLOv4类
-```
-fastdeploy.vision.wongkinyiu.ScaledYOLOv4(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-ScaledYOLOv4模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`scaledyolov4.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> ScaledYOLOv4.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[scaled_yolov4.py](./scaled_yolov4.py)
-
-
-## C++ API
-
-### ScaledYOLOv4类
-```
-fastdeploy::vision::wongkinyiu::ScaledYOLOv4(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-ScaledYOLOv4模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`scaledyolov4.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result,
->                       float conf_threshold = 0.25,
->                       float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/scaledyolov4.cc](cpp/scaledyolov4.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt b/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt
deleted file mode 100644
index 062f4fa5d..000000000
--- a/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(scaledyolov4_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(scaledyolov4_demo ${PROJECT_SOURCE_DIR}/scaledyolov4.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(scaledyolov4_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/scaledyolov4/cpp/README.md b/model_zoo/vision/scaledyolov4/cpp/README.md
deleted file mode 100644
index 7372cc8b9..000000000
--- a/model_zoo/vision/scaledyolov4/cpp/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 编译ScaledYOLOv4示例
-
-当前支持模型版本为：[ScaledYOLOv4 branch yolov4-large](https://github.com/WongKinYiu/ScaledYOLOv4)
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)官方github库，按照指引下载安装，下载`scaledyolov4.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现问题，可以参考[ScaledYOLOv4#401](https://github.com/WongKinYiu/ScaledYOLOv4/issues/401)的解决办法
-
-  ```
-  #下载ScaledYOLOv4模型文件
-  Download from the goole drive https://drive.google.com/file/d/1aXZZE999sHMP1gev60XhNChtHPRMH3Fz/view?usp=sharing
-
-  # 导出onnx格式文件
-  python models/export.py  --weights PATH/TO/scaledyolov4-xx-xx-xx.pt --img-size 640
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/
-  ```
-
-
-## 运行demo
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 移动onnx文件到demo目录
-cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/cpp/build/
-
-# 下载图片
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-
-# 执行
-./scaledyolov4_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-665.666321,390.477173, 810.000000, 879.829346, 0.940627, 0
-48.266064,396.217163, 247.338425, 901.974915, 0.922277, 0
-221.351868,408.446259, 345.524017, 857.927917, 0.910516, 0
-14.989746,228.662842, 801.292236, 735.677490, 0.820487, 5
-0.000000,548.260864, 75.825439, 873.932495, 0.718777, 0
-134.789062,473.950195, 148.526367, 506.777344, 0.513963, 27
-```
diff --git a/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc b/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc
deleted file mode 100644
index 13f9bc0c2..000000000
--- a/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::wongkinyiu::ScaledYOLOv4("scaledyolov4.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("bus.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/scaledyolov4/scaled_yolov4.py b/model_zoo/vision/scaledyolov4/scaled_yolov4.py
deleted file mode 100644
index 3bcf0fa58..000000000
--- a/model_zoo/vision/scaledyolov4/scaled_yolov4.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.wongkinyiu.ScaledYOLOv4("scaledyolov4.onnx")
-
-# 预测图片
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/scrfd/README.md b/model_zoo/vision/scrfd/README.md
deleted file mode 100644
index 4424f59a3..000000000
--- a/model_zoo/vision/scrfd/README.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# 编译SCRFD示例
-
-当前支持模型版本为：[SCRFD CID:17cdeab](https://github.com/deepinsight/insightface/tree/17cdeab12a35efcebc2660453a8cbeae96e20950)
-
-本文档说明如何进行[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp
-│   ├── CMakeLists.txt
-│   ├── README.md
-│   └── scrfd.cc
-├── README.md
-└── scrfd.py
-```
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)官方github库，按照指引下载安装，下载`scrfd.pt` 模型，利用 `tools/scrfd2onnx.py` 得到`onnx`格式文件。
-
-
-
-  ```
-  #下载scrfd模型文件
-  e.g. download from  https://onedrive.live.com/?authkey=%21ABbFJx2JMhNjhNA&id=4A83B6B633B029CC%215542&cid=4A83B6B633B029CC
-
-  # 安装官方库配置环境，此版本导出环境为：
-  - 手动配置环境
-    torch==1.8.0
-    mmcv==1.3.5
-    mmdet==2.7.0
-
-  - 通过docker配置
-    docker pull qyjdefdocker/onnx-scrfd-converter:v0.3
-
-  # 导出onnx格式文件
-  - 手动生成
-    python tools/scrfd2onnx.py configs/scrfd/scrfd_500m.py weights/scrfd_500m.pth --shape 640 --input-img face-xxx.jpg
-
-  - docker
-    docker的onnx目录中已有生成好的onnx文件
-
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/SCRFD.onnx PATH/TO/model_zoo/vision/scrfd/
-  ```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-## Python部署
-
-执行如下代码即会自动下载测试图片
-```
-python scrfd.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score]
-437.670410,194.262772, 478.729828, 244.633911, 0.912465
-418.303650,118.277687, 455.877838, 169.209564, 0.911748
-269.449493,280.810608, 319.466614, 342.681213, 0.908530
-775.553955,237.509979, 814.626526, 286.252350, 0.901296
-565.155945,303.849670, 608.786255, 356.025726, 0.898307
-411.813477,296.117584, 454.560394, 353.151367, 0.889968
-688.620239,153.063812, 728.825195, 204.860321, 0.888146
-686.523071,304.881104, 732.901245, 364.715088, 0.885789
-194.658829,236.657883, 234.194748, 289.099701, 0.881143
-137.273422,286.025787, 183.479523, 344.614441, 0.877399
-289.256775,148.388992, 326.087769, 197.035645, 0.875090
-182.943939,154.105682, 221.422440, 204.460495, 0.871119
-330.301849,207.786499, 367.546692, 260.813232, 0.869559
-659.884216,254.861847, 701.580017, 307.984711, 0.869249
-550.305359,232.336868, 591.702026, 281.101532, 0.866158
-567.473511,127.402367, 604.959839, 175.831696, 0.858938
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [SCRFD API文档](./api.md)
diff --git a/model_zoo/vision/scrfd/api.md b/model_zoo/vision/scrfd/api.md
deleted file mode 100644
index 442bd4a25..000000000
--- a/model_zoo/vision/scrfd/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# SCRFD API说明
-
-## Python API
-
-### SCRFD类
-```
-fastdeploy.vision.deepinsight.SCRFD(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-SCRFD模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`SCRFD.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> SCRFD.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[scrfd.py](./scrfd.py)
-
-
-## C++ API
-
-### SCRFD类
-```
-fastdeploy::vision::deepinsight::SCRFD(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-SCRFD模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`SCRFD.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> SCRFD::Predict(cv::Mat* im, FaceDetectionResult* result,
->                float conf_threshold = 0.25,
->                float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/scrfd.cc](cpp/scrfd.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/scrfd/cpp/CMakeLists.txt b/model_zoo/vision/scrfd/cpp/CMakeLists.txt
deleted file mode 100644
index e63971ba1..000000000
--- a/model_zoo/vision/scrfd/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(scrfd_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(scrfd_demo ${PROJECT_SOURCE_DIR}/scrfd.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(scrfd_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/scrfd/cpp/README.md b/model_zoo/vision/scrfd/cpp/README.md
deleted file mode 100644
index fe2ee64d3..000000000
--- a/model_zoo/vision/scrfd/cpp/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# 编译SCRFD示例
-
-当前支持模型版本为：[SCRFD CID:17cdeab](https://github.com/deepinsight/insightface/tree/17cdeab12a35efcebc2660453a8cbeae96e20950)
-
-本文档说明如何进行[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)的快速部署推理。本目录结构如下
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)官方github库，按照指引下载安装，下载`scrfd.pt` 模型，利用 `tools/scrfd2onnx.py` 得到`onnx`格式文件。
-
-
-  ```
-  #下载scrfd模型文件
-  e.g. download from  https://onedrive.live.com/?authkey=%21ABbFJx2JMhNjhNA&id=4A83B6B633B029CC%215542&cid=4A83B6B633B029CC
-
-  # 安装官方库配置环境，此版本导出环境为：
-  - 手动配置环境
-    torch==1.8.0
-    mmcv==1.3.5
-    mmdet==2.7.0
-
-  - 通过docker配置
-    docker pull qyjdefdocker/onnx-scrfd-converter:v0.3
-
-  # 导出onnx格式文件
-  - 手动生成
-    python tools/scrfd2onnx.py configs/scrfd/scrfd_500m.py weights/scrfd_500m.pth --shape 640 --input-img face-xxx.jpg
-
-  - docker
-    docker的onnx目录中已有生成好的onnx文件
-
-
-## 运行demo
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 移动onnx文件到demo目录
-cp PATH/TO/SCRFD.onnx PATH/TO/model_zoo/vision/scrfd/cpp/build/
-
-# 下载图片
-wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg
-
-# 执行
-./scrfd_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score]
-437.670410,194.262772, 478.729828, 244.633911, 0.912465
-418.303650,118.277687, 455.877838, 169.209564, 0.911748
-269.449493,280.810608, 319.466614, 342.681213, 0.908530
-775.553955,237.509979, 814.626526, 286.252350, 0.901296
-565.155945,303.849670, 608.786255, 356.025726, 0.898307
-411.813477,296.117584, 454.560394, 353.151367, 0.889968
-688.620239,153.063812, 728.825195, 204.860321, 0.888146
-686.523071,304.881104, 732.901245, 364.715088, 0.885789
-194.658829,236.657883, 234.194748, 289.099701, 0.881143
-137.273422,286.025787, 183.479523, 344.614441, 0.877399
-289.256775,148.388992, 326.087769, 197.035645, 0.875090
-182.943939,154.105682, 221.422440, 204.460495, 0.871119
-330.301849,207.786499, 367.546692, 260.813232, 0.869559
-659.884216,254.861847, 701.580017, 307.984711, 0.869249
-550.305359,232.336868, 591.702026, 281.101532, 0.866158
-567.473511,127.402367, 604.959839, 175.831696, 0.858938
-```
diff --git a/model_zoo/vision/scrfd/cpp/scrfd.cc b/model_zoo/vision/scrfd/cpp/scrfd.cc
deleted file mode 100644
index 72dbeb4c7..000000000
--- a/model_zoo/vision/scrfd/cpp/scrfd.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::deepinsight::SCRFD("SCRFD.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("test_lite_face_detector_3.jpg");
-  cv::Mat vis_im = im.clone();
-
-  // 如果导入不带有关键点预测的模型，请修改模型参数 use_kps 和 landmarks_per_face，示例如下
-  // model.landmarks_per_face = 0;
-  // model.use_kps = false;
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/scrfd/scrfd.py b/model_zoo/vision/scrfd/scrfd.py
deleted file mode 100644
index 1d4ae8c76..000000000
--- a/model_zoo/vision/scrfd/scrfd.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-test_jpg_url = "https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg"
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.deepinsight.SCRFD("SCRFD.onnx")
-
-# 如果导入不带有关键点预测的模型，请修改模型参数 use_kps 和 landmarks_per_face，示例如下
-# model.use_kps = False
-# model.landmarks_per_face = 0
-
-# 预测图片
-im = cv2.imread("test_lite_face_detector_3.jpg")
-result = model.predict(im, conf_threshold=0.5, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_face_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/ultraface/README.md b/model_zoo/vision/ultraface/README.md
deleted file mode 100644
index 264f1b5cb..000000000
--- a/model_zoo/vision/ultraface/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# UltraFace部署示例
-
-当前支持模型版本为：[UltraFace CommitID:dffdddd](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/commit/dffdddd)
-
-本文档说明如何进行[UltraFace](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp                     # C++ 代码目录
-│   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
-│   ├── README.md           # C++ 代码编译部署文档
-│   └── ultraface.cc        # C++ 示例代码
-├── api.md                  # API 说明文档
-├── README.md               # UltraFace 部署文档
-└── ultraface.py            # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```bash
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载UltraFace模型和测试图片
-```bash
-python ultraface.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score]
-742.528931,261.309937, 837.749146, 365.145599, 0.999833
-408.159332,253.410889, 484.747284, 353.378052, 0.999832
-549.409424,225.051819, 636.311890, 337.824707, 0.999782
-185.562805,233.364044, 252.001801, 323.948669, 0.999709
-304.065918,180.468140, 377.097961, 278.932861, 0.999645
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [UltraFace API文档](./api.md)
diff --git a/model_zoo/vision/ultraface/api.md b/model_zoo/vision/ultraface/api.md
deleted file mode 100644
index 8dc7d2fb7..000000000
--- a/model_zoo/vision/ultraface/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# UltraFace API说明
-
-## Python API
-
-### UltraFace类
-```
-fastdeploy.vision.linzaer.UltraFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-UltraFace模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`version-RFB-320.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> UltraFace.predict(image_data, conf_threshold=0.7, nms_iou_threshold=0.3)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[ultraface.py](./ultraface.py)
-
-
-## C++ API
-
-### UltraFace类
-```
-fastdeploy::vision::linzaer::UltraFace(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-UltraFace模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`version-RFB-320.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> UltraFace::Predict(cv::Mat* im, FaceDetectionResult* result,
->                    float conf_threshold = 0.7,
->                    float nms_iou_threshold = 0.3)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/ultraface.cc](cpp/ultraface.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/ultraface/cpp/CMakeLists.txt b/model_zoo/vision/ultraface/cpp/CMakeLists.txt
deleted file mode 100644
index a33967dee..000000000
--- a/model_zoo/vision/ultraface/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(ultraface_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(ultraface_demo ${PROJECT_SOURCE_DIR}/ultraface.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(ultraface_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/ultraface/cpp/README.md b/model_zoo/vision/ultraface/cpp/README.md
deleted file mode 100644
index d2098d838..000000000
--- a/model_zoo/vision/ultraface/cpp/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# 编译UltraFace示例
-
-当前支持模型版本为：[UltraFace CommitID:dffdddd](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/commit/dffdddd)
-
-## 下载和解压预测库
-```bash
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-```
-
-## 编译示例代码
-```bash
-mkdir build & cd build
-cmake ..
-make -j
-```
-
-## 下载模型和图片
-wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/models/onnx/version-RFB-320.onnx  
-wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg
-
-
-## 执行
-```bash
-./ultraface_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score]
-742.528931,261.309937, 837.749146, 365.145599, 0.999833
-408.159332,253.410889, 484.747284, 353.378052, 0.999832
-549.409424,225.051819, 636.311890, 337.824707, 0.999782
-185.562805,233.364044, 252.001801, 323.948669, 0.999709
-304.065918,180.468140, 377.097961, 278.932861, 0.999645
-```
diff --git a/model_zoo/vision/ultraface/cpp/ultraface.cc b/model_zoo/vision/ultraface/cpp/ultraface.cc
deleted file mode 100644
index 9f1aa8a9b..000000000
--- a/model_zoo/vision/ultraface/cpp/ultraface.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-
-  auto model = vis::linzaer::UltraFace("version-RFB-320.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed! Model: " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "Init Done! Model:" << model_file << std::endl;
-  }
-  model.EnableDebug();
-
-  cv::Mat im = cv::imread("3.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.7f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  } else {
-    std::cout << "Prediction Done!" << std::endl;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
-  return 0;
-}
diff --git a/model_zoo/vision/ultraface/ultraface.py b/model_zoo/vision/ultraface/ultraface.py
deleted file mode 100644
index ceb4c313f..000000000
--- a/model_zoo/vision/ultraface/ultraface.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型
-model_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/models/onnx/version-RFB-320.onnx"
-test_img_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_img_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.linzaer.UltraFace("version-RFB-320.onnx")
-
-# 预测图片
-im = cv2.imread("3.jpg")
-result = model.predict(im, conf_threshold=0.7, nms_iou_threshold=0.3)
-
-# 可视化结果
-fd.vision.visualize.vis_face_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolor/README.md b/model_zoo/vision/yolor/README.md
deleted file mode 100644
index 358e62bbe..000000000
--- a/model_zoo/vision/yolor/README.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# 编译YOLOR示例
-
-当前支持模型版本为：[YOLOR weights](https://github.com/WongKinYiu/yolor/releases/tag/weights)
-(tips: 如果使用 `git clone` 的方式下载仓库代码，请将分支切换(checkout)到 `paper` 分支).
-
-本文档说明如何进行[YOLOR](https://github.com/WongKinYiu/yolor)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp
-│   ├── CMakeLists.txt
-│   ├── README.md
-│   └── yolor.cc
-├── README.md
-└── yolor.py
-```
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[YOLOR](https://github.com/WongKinYiu/yolor)官方github库，按照指引下载安装，下载`yolor.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现精度不达标或者是数据维度的问题，可以参考[yolor#32](https://github.com/WongKinYiu/yolor/issues/32)的解决办法
-
-  ```
-  #下载yolor模型文件
-  wget https://github.com/WongKinYiu/yolor/releases/download/weights/yolor-d6-paper-570.pt
-
-  # 导出onnx格式文件
-  python models/export.py  --weights PATH/TO/yolor-xx-xx-xx.pt --img-size 640
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/
-  ```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-## Python部署
-
-执行如下代码即会自动下载测试图片
-```
-python yolor.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-0.000000,185.201431, 315.673126, 410.071594, 0.959289, 17
-433.802826,211.603455, 595.489319, 346.425537, 0.952615, 17
-230.446854,195.618805, 418.365479, 362.712128, 0.884253, 17
-336.545624,208.555618, 457.704315, 323.543152, 0.788450, 17
-0.896423,183.936996, 154.788727, 304.916412, 0.672804, 17
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOR API文档](./api.md)
diff --git a/model_zoo/vision/yolor/api.md b/model_zoo/vision/yolor/api.md
deleted file mode 100644
index b1e5be889..000000000
--- a/model_zoo/vision/yolor/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOR API说明
-
-## Python API
-
-### YOLOR类
-```
-fastdeploy.vision.wongkinyiu.YOLOR(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOR模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolor.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOR.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolor.py](./yolor.py)
-
-
-## C++ API
-
-### YOLOR类
-```
-fastdeploy::vision::wongkinyiu::YOLOR(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOR模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolor.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOR::Predict(cv::Mat* im, DetectionResult* result,
->                 float conf_threshold = 0.25,
->                 float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolor.cc](cpp/yolor.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolor/cpp/CMakeLists.txt b/model_zoo/vision/yolor/cpp/CMakeLists.txt
deleted file mode 100644
index 18248b845..000000000
--- a/model_zoo/vision/yolor/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolor_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolor_demo ${PROJECT_SOURCE_DIR}/yolor.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolor_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolor/cpp/README.md b/model_zoo/vision/yolor/cpp/README.md
deleted file mode 100644
index d06bbe300..000000000
--- a/model_zoo/vision/yolor/cpp/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 编译YOLOR示例
-
-当前支持模型版本为：[YOLOR weights](https://github.com/WongKinYiu/yolor/releases/tag/weights)
-(tips: 如果使用 `git clone` 的方式下载仓库代码，请将分支切换(checkout)到 `paper` 分支).
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[YOLOR](https://github.com/WongKinYiu/yolor)官方github库，按照指引下载安装，下载`yolor.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现精度不达标或者是数据维度的问题，可以参考[yolor#32](https://github.com/WongKinYiu/yolor/issues/32)的解决办法
-
-  ```
-  #下载yolor模型文件
-  wget https://github.com/WongKinYiu/yolor/releases/download/weights/yolor-d6-paper-570.pt
-
-  # 导出onnx格式文件
-  python models/export.py  --weights PATH/TO/yolor-xx-xx-xx.pt --img-size 640
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/
-  ```
-
-
-## 运行demo
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 移动onnx文件到demo目录
-cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/cpp/build/
-
-# 下载图片
-wget https://raw.githubusercontent.com/WongKinYiu/yolor/paper/inference/images/horses.jpg
-
-# 执行
-./yolor_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-0.000000,185.201431, 315.673126, 410.071594, 0.959289, 17
-433.802826,211.603455, 595.489319, 346.425537, 0.952615, 17
-230.446854,195.618805, 418.365479, 362.712128, 0.884253, 17
-336.545624,208.555618, 457.704315, 323.543152, 0.788450, 17
-0.896423,183.936996, 154.788727, 304.916412, 0.672804, 17
-```
diff --git a/model_zoo/vision/yolor/cpp/yolor.cc b/model_zoo/vision/yolor/cpp/yolor.cc
deleted file mode 100644
index db194583f..000000000
--- a/model_zoo/vision/yolor/cpp/yolor.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::wongkinyiu::YOLOR("yolor.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("horses.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolor/yolor.py b/model_zoo/vision/yolor/yolor.py
deleted file mode 100644
index 56d3f9689..000000000
--- a/model_zoo/vision/yolor/yolor.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-test_jpg_url = "https://raw.githubusercontent.com/WongKinYiu/yolor/paper/inference/images/horses.jpg"
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.wongkinyiu.YOLOR("yolor.onnx")
-
-# 预测图片
-im = cv2.imread("horses.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolov5/README.md b/model_zoo/vision/yolov5/README.md
deleted file mode 100644
index 03b19d44c..000000000
--- a/model_zoo/vision/yolov5/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# YOLOv5部署示例
-
-当前支持模型版本为：[YOLOv5 v6.0](https://github.com/ultralytics/yolov5/releases/download/v6.0)
-
-本文档说明如何进行[YOLOv5](https://github.com/ultralytics/yolov5)的快速部署推理。本目录结构如下
-```
-.
-├── cpp                 # C++ 代码目录
-│   ├── CMakeLists.txt  # C++ 代码编译CMakeLists文件
-│   ├── README.md       # C++ 代码编译部署文档
-│   └── yolov5.cc       # C++ 示例代码
-├── README.md           # YOLOv5 部署文档
-└── yolov5.py           # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载YOLOv5模型和测试图片
-```
-python yolov5.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0
-668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0
-50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0
-23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5
-0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOv5 API文档](./api.md)
diff --git a/model_zoo/vision/yolov5/api.md b/model_zoo/vision/yolov5/api.md
deleted file mode 100644
index 66d6acdc7..000000000
--- a/model_zoo/vision/yolov5/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOv5 API说明
-
-## Python API
-
-### YOLOv5类
-```
-fastdeploy.vision.ultralytics.YOLOv5(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOv5模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov5s.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOv5.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolov5.py](./yolov5.py)
-
-
-## C++ API
-
-### YOLOv5类
-```
-fastdeploy::vision::ultralytics::YOLOv5(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOv5模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov5s.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOv5::Predict(cv::Mat* im, DetectionResult* result,
->                 float conf_threshold = 0.25,
->                 float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolov5.cc](cpp/yolov5.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolov5/cpp/CMakeLists.txt b/model_zoo/vision/yolov5/cpp/CMakeLists.txt
deleted file mode 100644
index c1f82a6fe..000000000
--- a/model_zoo/vision/yolov5/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-PROJECT(yolov5_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR  /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0)
-
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolov5/cpp/README.md b/model_zoo/vision/yolov5/cpp/README.md
deleted file mode 100644
index a1f1bde49..000000000
--- a/model_zoo/vision/yolov5/cpp/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# 编译YOLOv5示例
-
-当前支持模型版本为：[YOLOv5 v6.0](https://github.com/ultralytics/yolov5/releases/download/v6.0)
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 下载模型和图片
-wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-
-# 执行
-./yolov5_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0
-668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0
-50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0
-23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5
-0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
-```
diff --git a/model_zoo/vision/yolov5/cpp/yolov5.cc b/model_zoo/vision/yolov5/cpp/yolov5.cc
deleted file mode 100644
index dddcee843..000000000
--- a/model_zoo/vision/yolov5/cpp/yolov5.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("bus.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolov5/yolov5.py b/model_zoo/vision/yolov5/yolov5.py
deleted file mode 100644
index c502c6636..000000000
--- a/model_zoo/vision/yolov5/yolov5.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-model_url = "https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx"
-test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx")
-
-# 预测图片
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolov5face/README.md b/model_zoo/vision/yolov5face/README.md
deleted file mode 100644
index e1713e67d..000000000
--- a/model_zoo/vision/yolov5face/README.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# YOLOv5Face部署示例
-
-当前支持模型版本为：[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead)
-
-本文档说明如何进行[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp                     # C++ 代码目录
-│   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
-│   ├── README.md           # C++ 代码编译部署文档
-│   └── yolov5face.cc       # C++ 示例代码
-├── api.md                  # API 说明文档
-├── README.md               # YOLOv5Face 部署文档
-└── yolov5face.py           # Python示例代码
-```
-
-## 获取ONNX文件
-
-访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库，按照指引下载安装，下载`yolov5s-face.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
-
-* 下载yolov5face模型文件
-  ```
-  Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q  
-  https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing
-  ```
-
-* 导出onnx格式文件
-  ```bash
-  PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1  
-  ```
-* onnx模型简化(可选)
-  ```bash
-  onnxsim yolov5s-face.onnx yolov5s-face.onnx
-  ```
-* 移动onnx文件到model_zoo/yolov5face的目录
-  ```bash
-  cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/
-  ```
-
-
-
-## 准备测试图片
-准备一张包含人脸的测试图片，命名为test.jpg，并拷贝到可执行文件所在的目录
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```bash
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载YOLOv5Face模型和测试图片
-```bash
-python yolov5face.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
-749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470)
-897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066)
-281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048)
-132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197)
-699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794)
-# ...
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOv5Face API文档](./api.md)
diff --git a/model_zoo/vision/yolov5face/api.md b/model_zoo/vision/yolov5face/api.md
deleted file mode 100644
index ea32820f6..000000000
--- a/model_zoo/vision/yolov5face/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOv5Face API说明
-
-## Python API
-
-### YOLOv5Face类
-```
-fastdeploy.vision.deepcam.YOLOv5Face(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOv5Face模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov5s-face.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOv5Face.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolov5face.py](./yolov5face.py)
-
-
-## C++ API
-
-### YOLOv5Face类
-```
-fastdeploy::vision::deepcam::YOLOv5Face(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOv5Face模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov5s-face.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOv5Face::Predict(cv::Mat* im, FaceDetectionResult* result,
->                     float conf_threshold = 0.25,
->                     float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolov5face.cc](cpp/yolov5face.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolov5face/cpp/CMakeLists.txt b/model_zoo/vision/yolov5face/cpp/CMakeLists.txt
deleted file mode 100644
index 23878ac2c..000000000
--- a/model_zoo/vision/yolov5face/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolov5face_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov5face_demo ${PROJECT_SOURCE_DIR}/yolov5face.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolov5face_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolov5face/cpp/README.md b/model_zoo/vision/yolov5face/cpp/README.md
deleted file mode 100644
index 60d46cb87..000000000
--- a/model_zoo/vision/yolov5face/cpp/README.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# 编译YOLOv5Face示例
-
-当前支持模型版本为：[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead)
-
-## 下载和解压预测库
-```bash
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-```
-
-## 编译示例代码
-```bash
-mkdir build & cd build
-cmake ..
-make -j
-```
-
-## 获取ONNX文件
-
-访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库，按照指引下载安装，下载`yolov5s-face.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
-
-* 下载yolov5face模型文件
-  ```
-  Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q  
-  https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing
-  ```
-
-* 导出onnx格式文件
-  ```bash
-  PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1  
-  ```
-* onnx模型简化(可选)
-  ```bash
-  onnxsim yolov5s-face.onnx yolov5s-face.onnx
-  ```
-* 移动onnx文件到可执行文件的目录
-  ```bash
-  cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/cpp/build
-  ```
-
-
-
-## 准备测试图片
-准备一张包含人脸的测试图片，命名为test.jpg，并拷贝到可执行文件所在的目录
-
-## 执行
-```bash
-./yolov5face_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
-749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470)
-897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066)
-281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048)
-132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197)
-699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794)
-# ...
-```
diff --git a/model_zoo/vision/yolov5face/cpp/yolov5face.cc b/model_zoo/vision/yolov5face/cpp/yolov5face.cc
deleted file mode 100644
index baa0bb7c0..000000000
--- a/model_zoo/vision/yolov5face/cpp/yolov5face.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::deepcam::YOLOv5Face("yolov5s-face.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("test.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::FaceDetectionResult res;
-  if (!model.Predict(&im, &res, 0.1f, 0.3f)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolov5face/yolov5face.py b/model_zoo/vision/yolov5face/yolov5face.py
deleted file mode 100644
index ff7ab1b77..000000000
--- a/model_zoo/vision/yolov5face/yolov5face.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 加载模型
-model = fd.vision.deepcam.YOLOv5Face("yolov5s-face.onnx")
-
-# 预测图片
-im = cv2.imread("test.jpg")
-result = model.predict(im, conf_threshold=0.1, nms_iou_threshold=0.3)
-
-# 可视化结果
-fd.vision.visualize.vis_face_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolov5lite/README.md b/model_zoo/vision/yolov5lite/README.md
deleted file mode 100644
index 22c726e85..000000000
--- a/model_zoo/vision/yolov5lite/README.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# 编译YOLOv5Lite示例
-
-当前支持模型版本为：[YOLOv5-Lite-v1.4](https://github.com/ppogg/YOLOv5-Lite/releases/tag/v1.4)
-
-本文档说明如何进行[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp
-│   ├── CMakeLists.txt
-│   ├── README.md
-│   └── yolov5lite.cc
-├── README.md
-└── yolov5lite.py
-```
-
-## 获取ONNX文件
-- 自动获取
-  访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)
-官方github库，按照指引下载安装，下载`yolov5-lite-xx.onnx` 模型(Tips：官方提供的ONNX文件目前是没有decode模块的)
-  ```
-  #下载yolov5-lite模型文件(.onnx)
-  Download from https://drive.google.com/file/d/1bJByk9eoS6pv8Z3N4bcLRCV3i7uk24aU/view
-  官方Repo也支持百度云下载
-  ```
-
-- 手动获取
-
-  访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)
-官方github库，按照指引下载安装，下载`yolov5-lite-xx.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
-
-  - 导出含有decode模块的ONNX文件
-
-  首先需要参考[YOLOv5-Lite#189](https://github.com/ppogg/YOLOv5-Lite/pull/189)的解决办法，修改代码。
-
-  ```
-  #下载yolov5-lite模型文件(.pt)
-  Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view
-  官方Repo也支持百度云下载
-
-  # 导出onnx格式文件
-  python export.py --grid --dynamic --concat --weights PATH/TO/yolov5-lite-xx.pt
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/
-  ```
-  - 导出无decode模块的ONNX文件(不需要修改代码)
-
-  ```
-  #下载yolov5-lite模型文件
-  Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view
-  官方Repo也支持百度云下载
-
-  # 导出onnx格式文件
-  python export.py --grid --dynamic --weights PATH/TO/yolov5-lite-xx.pt
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/
-  ```
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## 设置ONNX文件处理方式
-
-如果ONNX文件是含有decode模块的，设置`model.is_decode_exported = True`(解除yolov5lite.py第12行注释)
-
-如果ONNX文件是无decode模块的，不用做任何处理，默认是`model.is_decode_exported = False`
-
-## Python部署
-
-执行如下代码即会自动下载测试图片
-```
-python yolov5lite.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-1289.729126,698.414612, 1404.110229, 1023.949524, 0.893141, 0
-300.958649,1027.166992, 449.921753, 1299.823608, 0.887509, 0
-627.481201,823.830750, 718.942078, 1133.402344, 0.885308, 0
-152.969437,1147.352905, 257.228424, 1301.652710, 0.877009, 0
-512.867188,773.371094, 649.768494, 1123.529785, 0.870583, 0
-906.801147,508.160278, 997.325867, 825.934509, 0.867381, 0
-307.480988,87.785973, 408.681732, 387.337463, 0.860646, 0
-783.116821,492.420319, 871.741028, 774.283691, 0.851574, 0
-1347.626343,190.911758, 1452.582031, 459.044617, 0.837095, 0
-3.035009,3.509769, 97.237442, 257.884094, 0.835671, 0
-261.199738,303.971527, 371.036041, 569.222595, 0.834187, 0
-1170.358032,722.587219, 1284.564087, 1036.034302, 0.833685, 0
-660.728333,476.764618, 760.990723, 783.636414, 0.823469, 0
-777.628906,815.975098, 886.895935, 1115.206421, 0.820669, 0
-415.902740,983.790283, 543.582764, 1300.361206, 0.791539, 0
-132.273209,40.751694, 210.614563, 285.128174, 0.790815, 0
-1331.930664,370.903687, 1446.262573, 638.119202, 0.773755, 0
-1254.425293,31.073910, 1352.297241, 312.583282, 0.743923, 0
-915.965088,310.556458, 1031.921265, 624.672302, 0.696823, 0
-499.573517,362.165588, 595.503296, 624.872070, 0.678821, 0
-956.890747,76.389160, 1068.599609, 340.183533, 0.656648, 0
-452.388977,320.288269, 532.330688, 593.987915, 0.652459, 0
-488.305664,1028.187012, 565.136719, 1179.688477, 0.629574, 24
-855.175781,868.482422, 916.516113, 988.196777, 0.555574, 26
-1321.689453,1.638852, 1384.584961, 99.413322, 0.504122, 0
-845.324707,531.824768, 875.939941, 614.515198, 0.472173, 26
-1342.546387,2.096432, 1420.351929, 98.888016, 0.463313, 0
-990.747070,635.389221, 1018.249512, 695.264709, 0.444000, 26
-956.799316,120.643112, 1015.100098, 242.920944, 0.442043, 26
-560.449219,401.270538, 607.763672, 522.486389, 0.434484, 26
-1329.199219,372.522980, 1443.054199, 635.315979, 0.399014, 26
-956.140137,88.526413, 1047.509766, 305.213409, 0.367863, 26
-1379.296875,852.808594, 1406.909180, 916.456055, 0.366000, 26
-1331.909180,468.433624, 1369.299316, 532.044495, 0.352329, 26
-864.880371,915.723633, 916.223145, 990.979980, 0.325205, 26
-260.778809,341.724640, 322.229004, 442.432648, 0.320724, 24
-1271.154785,77.393600, 1336.230469, 186.194870, 0.307823, 26
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOv5Lite API文档](./api.md)
diff --git a/model_zoo/vision/yolov5lite/api.md b/model_zoo/vision/yolov5lite/api.md
deleted file mode 100644
index 38cd87725..000000000
--- a/model_zoo/vision/yolov5lite/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOv5Lite API说明
-
-## Python API
-
-### YOLOv5Lite类
-```
-fastdeploy.vision.ppogg.YOLOv5Lite(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOv5Lite模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov5lite.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOv5Lite.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolov5_lite.py](./yolov5_lite.py)
-
-
-## C++ API
-
-### YOLOv5Lite类
-```
-fastdeploy::vision::ppogg::YOLOv5Lite(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOv5Lite模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov5lite.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result,
->                     float conf_threshold = 0.25,
->                     float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolov5lite.cc](cpp/yolov5lite.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt b/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt
deleted file mode 100644
index 855076a08..000000000
--- a/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolov5lite_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov5lite_demo ${PROJECT_SOURCE_DIR}/yolov5lite.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolov5lite_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolov5lite/cpp/README.md b/model_zoo/vision/yolov5lite/cpp/README.md
deleted file mode 100644
index 495fb8145..000000000
--- a/model_zoo/vision/yolov5lite/cpp/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 编译YOLOv5Lite示例
-
-当前支持模型版本为：[YOLOv5-Lite-v1.4](https://github.com/ppogg/YOLOv5-Lite/releases/tag/v1.4)
-
-## 获取ONNX文件
-- 自动获取
-  访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)
-官方github库，按照指引下载安装，下载`yolov5-lite-xx.onnx` 模型(Tips：官方提供的ONNX文件目前是没有decode模块的)
-  ```
-  #下载yolov5-lite模型文件(.onnx)
-  Download from https://drive.google.com/file/d/1bJByk9eoS6pv8Z3N4bcLRCV3i7uk24aU/view
-  官方Repo也支持百度云下载
-  ```
-
-- 手动获取
-
-  访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)
-官方github库，按照指引下载安装，下载`yolov5-lite-xx.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
-
-  - 导出含有decode模块的ONNX文件
-
-  首先需要参考[YOLOv5-Lite#189](https://github.com/ppogg/YOLOv5-Lite/pull/189)的解决办法，修改代码。
-
-  ```
-  #下载yolov5-lite模型文件(.pt)
-  Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view
-  官方Repo也支持百度云下载
-
-  # 导出onnx格式文件
-  python export.py --grid --dynamic --concat --weights PATH/TO/yolov5-lite-xx.pt
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/
-  ```
-  - 导出无decode模块的ONNX文件(不需要修改代码)
-
-  ```
-  #下载yolov5-lite模型文件
-  Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view
-  官方Repo也支持百度云下载
-
-  # 导出onnx格式文件
-  python export.py --grid --dynamic --weights PATH/TO/yolov5-lite-xx.pt
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/
-  ```
-
-## 设置ONNX文件处理方式
-
-如果ONNX文件是含有decode模块的，设置`model.is_decode_exported = true`(解除yolov5lite.cc第27行注释)
-
-如果ONNX文件是无decode模块的，不用做任何处理，默认是`model.is_decode_exported = false`
-
-## 运行demo
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 移动onnx文件到demo目录
-cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/cpp/build/
-
-# 下载图片
-wget https://raw.githubusercontent.com/ppogg/YOLOv5-Lite/master/cpp_demo/mnn/test.jpg
-
-# 执行
-./yolov5lite_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-1289.729126,698.414612, 1404.110229, 1023.949524, 0.893141, 0
-300.958649,1027.166992, 449.921753, 1299.823608, 0.887509, 0
-627.481201,823.830750, 718.942078, 1133.402344, 0.885308, 0
-152.969437,1147.352905, 257.228424, 1301.652710, 0.877009, 0
-512.867188,773.371094, 649.768494, 1123.529785, 0.870583, 0
-906.801147,508.160278, 997.325867, 825.934509, 0.867381, 0
-307.480988,87.785973, 408.681732, 387.337463, 0.860646, 0
-783.116821,492.420319, 871.741028, 774.283691, 0.851574, 0
-1347.626343,190.911758, 1452.582031, 459.044617, 0.837095, 0
-3.035009,3.509769, 97.237442, 257.884094, 0.835671, 0
-261.199738,303.971527, 371.036041, 569.222595, 0.834187, 0
-1170.358032,722.587219, 1284.564087, 1036.034302, 0.833685, 0
-660.728333,476.764618, 760.990723, 783.636414, 0.823469, 0
-777.628906,815.975098, 886.895935, 1115.206421, 0.820669, 0
-415.902740,983.790283, 543.582764, 1300.361206, 0.791539, 0
-132.273209,40.751694, 210.614563, 285.128174, 0.790815, 0
-1331.930664,370.903687, 1446.262573, 638.119202, 0.773755, 0
-1254.425293,31.073910, 1352.297241, 312.583282, 0.743923, 0
-915.965088,310.556458, 1031.921265, 624.672302, 0.696823, 0
-499.573517,362.165588, 595.503296, 624.872070, 0.678821, 0
-956.890747,76.389160, 1068.599609, 340.183533, 0.656648, 0
-452.388977,320.288269, 532.330688, 593.987915, 0.652459, 0
-488.305664,1028.187012, 565.136719, 1179.688477, 0.629574, 24
-855.175781,868.482422, 916.516113, 988.196777, 0.555574, 26
-1321.689453,1.638852, 1384.584961, 99.413322, 0.504122, 0
-845.324707,531.824768, 875.939941, 614.515198, 0.472173, 26
-1342.546387,2.096432, 1420.351929, 98.888016, 0.463313, 0
-990.747070,635.389221, 1018.249512, 695.264709, 0.444000, 26
-956.799316,120.643112, 1015.100098, 242.920944, 0.442043, 26
-560.449219,401.270538, 607.763672, 522.486389, 0.434484, 26
-1329.199219,372.522980, 1443.054199, 635.315979, 0.399014, 26
-956.140137,88.526413, 1047.509766, 305.213409, 0.367863, 26
-1379.296875,852.808594, 1406.909180, 916.456055, 0.366000, 26
-1331.909180,468.433624, 1369.299316, 532.044495, 0.352329, 26
-864.880371,915.723633, 916.223145, 990.979980, 0.325205, 26
-260.778809,341.724640, 322.229004, 442.432648, 0.320724, 24
-1271.154785,77.393600, 1336.230469, 186.194870, 0.307823, 26
-```
diff --git a/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc b/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc
deleted file mode 100644
index 206143f52..000000000
--- a/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::ppogg::YOLOv5Lite("yolov5lite.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("test.jpg");
-  cv::Mat vis_im = im.clone();
-  // 如果onnx是有decode模块的，需要修改参数
-  // model.is_decode_exported = true;
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolov5lite/yolov5lite.py b/model_zoo/vision/yolov5lite/yolov5lite.py
deleted file mode 100644
index 683db22f9..000000000
--- a/model_zoo/vision/yolov5lite/yolov5lite.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-test_jpg_url = "https://raw.githubusercontent.com/ppogg/YOLOv5-Lite/master/cpp_demo/mnn/test.jpg"
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.ppogg.YOLOv5Lite("yolov5lite.onnx")
-
-# 如果onnx是有decode模块的，需要修改参数
-# model.is_decode_exported = True
-
-# 预测图片
-im = cv2.imread("test.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolov6/README.md b/model_zoo/vision/yolov6/README.md
deleted file mode 100644
index accc6bdbb..000000000
--- a/model_zoo/vision/yolov6/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# YOLOv6部署示例
-
-当前支持模型版本为：[YOLOv6 v0.1.0](https://github.com/meituan/YOLOv6/releases/download/0.1.0)
-
-本文档说明如何进行[YOLOv6](https://github.com/meituan/YOLOv6)的快速部署推理。本目录结构如下
-```
-.
-├── cpp                 # C++ 代码目录
-│   ├── CMakeLists.txt  # C++ 代码编译CMakeLists文件
-│   ├── README.md       # C++ 代码编译部署文档
-│   └── yolov6.cc       # C++ 示例代码
-├── README.md           # YOLOv6 部署文档
-└── yolov6.py           # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载YOLOv6模型和测试图片
-```
-python yolov6.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-11.772949,229.269287, 792.933838, 748.294189, 0.954794, 5
-667.140381,396.185455, 807.701721, 881.810120, 0.900997, 0
-223.271011,405.105743, 345.740723, 859.328552, 0.898938, 0
-50.135777,405.863129, 245.485519, 904.153809, 0.888936, 0
-0.000000,549.002869, 77.864723, 869.455017, 0.614145, 0
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOv6 API文档](./api.md)
diff --git a/model_zoo/vision/yolov6/api.md b/model_zoo/vision/yolov6/api.md
deleted file mode 100644
index eca89f06a..000000000
--- a/model_zoo/vision/yolov6/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOv6 API说明
-
-## Python API
-
-### YOLOv6类
-```
-fastdeploy.vision.meituan.YOLOv6(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOv6模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov6s.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOv6.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolov6.py](./yolov6.py)
-
-
-## C++ API
-
-### YOLOv6类
-```
-fastdeploy::vision::meituan::YOLOv6(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOv6模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov6s.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOv6::Predict(cv::Mat* im, DetectionResult* result,
->                 float conf_threshold = 0.25,
->                 float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolov6.cc](cpp/yolov6.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolov6/cpp/CMakeLists.txt b/model_zoo/vision/yolov6/cpp/CMakeLists.txt
deleted file mode 100644
index 28987f7f7..000000000
--- a/model_zoo/vision/yolov6/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolov6_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov6_demo ${PROJECT_SOURCE_DIR}/yolov6.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolov6_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolov6/cpp/README.md b/model_zoo/vision/yolov6/cpp/README.md
deleted file mode 100644
index 0e2c03dbf..000000000
--- a/model_zoo/vision/yolov6/cpp/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# 编译YOLOv6示例
-
-当前支持模型版本为：[YOLOv6 v0.1.0](https://github.com/meituan/YOLOv6/releases/download/0.1.0)
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 下载模型和图片
-wget https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.onnx
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-
-# 执行
-./yolov6_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-11.772949,229.269287, 792.933838, 748.294189, 0.954794, 5
-667.140381,396.185455, 807.701721, 881.810120, 0.900997, 0
-223.271011,405.105743, 345.740723, 859.328552, 0.898938, 0
-50.135777,405.863129, 245.485519, 904.153809, 0.888936, 0
-0.000000,549.002869, 77.864723, 869.455017, 0.614145, 0
-```
diff --git a/model_zoo/vision/yolov6/cpp/yolov6.cc b/model_zoo/vision/yolov6/cpp/yolov6.cc
deleted file mode 100644
index 62d2fa0be..000000000
--- a/model_zoo/vision/yolov6/cpp/yolov6.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::meituan::YOLOv6("yolov6s.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("bus.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolov6/yolov6.py b/model_zoo/vision/yolov6/yolov6.py
deleted file mode 100644
index fa8aca074..000000000
--- a/model_zoo/vision/yolov6/yolov6.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import fastdeploy as fd
-import cv2 
-
-# 下载模型和测试图片
-model_url = "https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.onnx"
-test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.meituan.YOLOv6("yolov6s.onnx")
-
-# 预测图片
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolov7/README.md b/model_zoo/vision/yolov7/README.md
deleted file mode 100644
index a7165a045..000000000
--- a/model_zoo/vision/yolov7/README.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# 编译YOLOv7示例
-
-当前支持模型版本为：[YOLOv7 v0.1](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)
-
-本文档说明如何进行[YOLOv7](https://github.com/WongKinYiu/yolov7)的快速部署推理。本目录结构如下
-
-```
-.
-├── cpp
-│   ├── CMakeLists.txt
-│   ├── README.md
-│   └── yolov7.cc
-├── README.md
-└── yolov7.py
-```
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)官方github库，按照指引下载安装，下载`yolov7.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。
-
-
-
-  ```
-  #下载yolov7模型文件
-  wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
-
-  # 导出onnx格式文件 (Tips: 对应 YOLOv7 release v0.1 代码)
-  python models/export.py --grid --dynamic --weights PATH/TO/yolov7.pt
-
-  # 如果您的代码版本中有支持NMS的ONNX文件导出，请使用如下命令导出ONNX文件(请暂时不要使用 "--end2end"，我们后续将支持带有NMS的ONNX模型的部署)
-  python export.py --grid --dynamic --weights PATH/TO/yolov7.pt
-
-  # 移动onnx文件到demo目录
-  cp PATH/TO/yolov7.onnx PATH/TO/model_zoo/vision/yolov7/
-  ```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-## Python部署
-
-执行如下代码即会自动下载测试图片
-```
-python yolov7.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-0.056616,191.221619, 314.871063, 409.948914, 0.955449, 17
-432.547852,211.914841, 594.904297, 346.708618, 0.942706, 17
-0.000000,185.456207, 153.967789, 286.157562, 0.860487, 17
-224.049210,195.147003, 419.658234, 364.004852, 0.798262, 17
-369.316986,209.055725, 456.373840, 321.627625, 0.687066, 17
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOv7 API文档](./api.md)
diff --git a/model_zoo/vision/yolov7/api.md b/model_zoo/vision/yolov7/api.md
deleted file mode 100644
index abd2abdce..000000000
--- a/model_zoo/vision/yolov7/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOv7 API说明
-
-## Python API
-
-### YOLOv7类
-```
-fastdeploy.vision.wongkinyiu.YOLOv7(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOv7模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov7.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOv7.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolov7.py](./yolov7.py)
-
-
-## C++ API
-
-### YOLOv7类
-```
-fastdeploy::vision::wongkinyiu::YOLOv7(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOv7模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov7.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOv7::Predict(cv::Mat* im, DetectionResult* result,
->                 float conf_threshold = 0.25,
->                 float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolov7.cc](cpp/yolov7.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolov7/cpp/CMakeLists.txt b/model_zoo/vision/yolov7/cpp/CMakeLists.txt
deleted file mode 100644
index ec7c86d02..000000000
--- a/model_zoo/vision/yolov7/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolov7_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov7_demo ${PROJECT_SOURCE_DIR}/yolov7.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolov7_demo ${FASTDEPLOY_LIBS})
\ No newline at end of file
diff --git a/model_zoo/vision/yolov7/cpp/README.md b/model_zoo/vision/yolov7/cpp/README.md
deleted file mode 100644
index 6190b3ae7..000000000
--- a/model_zoo/vision/yolov7/cpp/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 编译YOLOv7示例
-
-当前支持模型版本为：[YOLOv7 v0.1](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)
-
-## 获取ONNX文件
-
-- 手动获取
-
-  访问[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)官方github库，按照指引下载安装，下载`yolov7.pt` 模型，利用 `models/export.py` 得到`onnx`格式文件。
-
-  ```
-  #下载yolov7模型文件
-  wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
-
-  # 导出onnx格式文件 (Tips: 对应 YOLOv7 release v0.1 代码)
-  python models/export.py --grid --dynamic --weights PATH/TO/yolov7.pt
-
-  # 如果您的代码版本中有支持NMS的ONNX文件导出，请使用如下命令导出ONNX文件(请暂时不要使用 "--end2end"，我们后续将支持带有NMS的ONNX模型的部署)
-  python export.py --grid --dynamic --weights PATH/TO/yolov7.pt
-  ```
-
-
-## 运行demo
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 移动onnx文件到demo目录
-cp PATH/TO/yolov7.onnx PATH/TO/model_zoo/vision/yolov7/cpp/build/
-
-# 下载图片
-wget https://raw.githubusercontent.com/WongKinYiu/yolov7/main/inference/images/horses.jpg
-
-# 执行
-./yolov7_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-0.056616,191.221619, 314.871063, 409.948914, 0.955449, 17
-432.547852,211.914841, 594.904297, 346.708618, 0.942706, 17
-0.000000,185.456207, 153.967789, 286.157562, 0.860487, 17
-224.049210,195.147003, 419.658234, 364.004852, 0.798262, 17
-369.316986,209.055725, 456.373840, 321.627625, 0.687066, 17
-```
diff --git a/model_zoo/vision/yolov7/cpp/yolov7.cc b/model_zoo/vision/yolov7/cpp/yolov7.cc
deleted file mode 100644
index 8b41c0288..000000000
--- a/model_zoo/vision/yolov7/cpp/yolov7.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::wongkinyiu::YOLOv7("yolov7.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("horses.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolov7/yolov7.py b/model_zoo/vision/yolov7/yolov7.py
deleted file mode 100644
index cef467622..000000000
--- a/model_zoo/vision/yolov7/yolov7.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-test_jpg_url = "https://raw.githubusercontent.com/WongKinYiu/yolov7/main/inference/images/horses.jpg"
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.wongkinyiu.YOLOv7("yolov7.onnx")
-
-# 预测图片
-im = cv2.imread("horses.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
-print(model.runtime_option)
diff --git a/model_zoo/vision/yolox/README.md b/model_zoo/vision/yolox/README.md
deleted file mode 100644
index d64a2f0ff..000000000
--- a/model_zoo/vision/yolox/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# YOLOX部署示例
-
-当前支持模型版本为：[YOLOX v0.1.1](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0)
-
-本文档说明如何进行[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX)的快速部署推理。本目录结构如下
-```
-.
-├── cpp                 # C++ 代码目录
-│   ├── CMakeLists.txt  # C++ 代码编译CMakeLists文件
-│   ├── README.md       # C++ 代码编译部署文档
-│   └── yolox.cc        # C++ 示例代码
-├── README.md           # YOLOX 部署文档
-└── yolox.py            # Python示例代码
-```
-
-## 安装FastDeploy
-
-使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
-```
-# 安装fastdeploy-python工具
-pip install fastdeploy-python
-
-# 安装vision-cpu模块
-fastdeploy install vision-cpu
-```
-
-## Python部署
-
-执行如下代码即会自动下载YOLOX模型和测试图片
-```
-python yolox.py
-```
-
-执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-17.151855,225.294434, 805.329712, 735.578613, 0.940478, 5
-671.162109,387.403961, 809.000000, 879.525513, 0.909566, 0
-54.373432,400.188110, 204.652756, 893.662537, 0.894507, 0
-221.339310,406.614960, 347.045593, 857.299927, 0.887144, 0
-0.083759,554.987305, 61.894527, 881.098816, 0.450202, 0
-```
-
-## 其它文档
-
-- [C++部署](./cpp/README.md)
-- [YOLOX API文档](./api.md)
diff --git a/model_zoo/vision/yolox/api.md b/model_zoo/vision/yolox/api.md
deleted file mode 100644
index c7a6f254b..000000000
--- a/model_zoo/vision/yolox/api.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# YOLOX API说明
-
-## Python API
-
-### YOLOX类
-```
-fastdeploy.vision.megvii.YOLOX(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
-```
-YOLOX模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolox_s.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### predict函数
-> ```
-> YOLOX.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
-> ```
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-> > * **conf_threshold**(float): 检测框置信度过滤阈值
-> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
-
-示例代码参考[yolox.py](./yolox.py)
-
-
-## C++ API
-
-### YOLOX类
-```
-fastdeploy::vision::megvii::YOLOX(
-        const string& model_file,
-        const string& params_file = "",
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const Frontend& model_format = Frontend::ONNX)
-```
-YOLOX模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolox_s.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(Frontend): 模型格式
-
-#### Predict函数
-> ```
-> YOLOX::Predict(cv::Mat* im, DetectionResult* result,
->                float conf_threshold = 0.25,
->                float nms_iou_threshold = 0.5)
-> ```
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 检测结果，包括检测框，各个框的置信度
-> > * **conf_threshold**: 检测框置信度过滤阈值
-> > * **nms_iou_threshold**: NMS处理过程中iou阈值
-
-示例代码参考[cpp/yolox.cc](cpp/yolox.cc)
-
-## 其它API使用
-
-- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
diff --git a/model_zoo/vision/yolox/cpp/CMakeLists.txt b/model_zoo/vision/yolox/cpp/CMakeLists.txt
deleted file mode 100644
index 67bf0f2da..000000000
--- a/model_zoo/vision/yolox/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PROJECT(yolox_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-
-# 在低版本ABI环境中，通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolox_demo ${PROJECT_SOURCE_DIR}/yolox.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(yolox_demo ${FASTDEPLOY_LIBS})
diff --git a/model_zoo/vision/yolox/cpp/README.md b/model_zoo/vision/yolox/cpp/README.md
deleted file mode 100644
index cc48878f6..000000000
--- a/model_zoo/vision/yolox/cpp/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# 编译YOLOX示例
-
-当前支持模型版本为：[YOLOX v0.1.1](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0)
-
-```
-# 下载和解压预测库
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 编译示例代码
-mkdir build & cd build
-cmake ..
-make -j
-
-# 下载模型和图片
-wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-
-# 执行
-./yolox_demo
-```
-
-执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-17.151855,225.294434, 805.329712, 735.578613, 0.940478, 5
-671.162109,387.403961, 809.000000, 879.525513, 0.909566, 0
-54.373432,400.188110, 204.652756, 893.662537, 0.894507, 0
-221.339310,406.614960, 347.045593, 857.299927, 0.887144, 0
-0.083759,554.987305, 61.894527, 881.098816, 0.450202, 0
-```
diff --git a/model_zoo/vision/yolox/cpp/yolox.cc b/model_zoo/vision/yolox/cpp/yolox.cc
deleted file mode 100644
index 934a50bea..000000000
--- a/model_zoo/vision/yolox/cpp/yolox.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-int main() {
-  namespace vis = fastdeploy::vision;
-  auto model = vis::megvii::YOLOX("yolox_s.onnx");
-  if (!model.Initialized()) {
-    std::cerr << "Init Failed." << std::endl;
-    return -1;
-  }
-  cv::Mat im = cv::imread("bus.jpg");
-  cv::Mat vis_im = im.clone();
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Prediction Failed." << std::endl;
-    return -1;
-  }
-
-  // 输出预测框结果
-  std::cout << res.Str() << std::endl;
-
-  // 可视化预测结果
-  vis::Visualize::VisDetection(&vis_im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  return 0;
-}
diff --git a/model_zoo/vision/yolox/yolox.py b/model_zoo/vision/yolox/yolox.py
deleted file mode 100644
index b63675049..000000000
--- a/model_zoo/vision/yolox/yolox.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 下载模型和测试图片
-model_url = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx"
-test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
-fd.download(model_url, ".", show_progress=True)
-fd.download(test_jpg_url, ".", show_progress=True)
-
-# 加载模型
-model = fd.vision.megvii.YOLOX("yolox_s.onnx")
-
-# 预测图片
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-
-# 可视化结果
-fd.vision.visualize.vis_detection(im, result)
-cv2.imwrite("vis_result.jpg", im)
-
-# 输出预测结果
-print(result)
diff --git a/sdk_mannager/fastdeploy/__init__.py b/sdk_mannager/fastdeploy/__init__.py
deleted file mode 100644
index ff1cb0c72..000000000
--- a/sdk_mannager/fastdeploy/__init__.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from six import text_type as _text_type
-import logging
-import argparse
-
-# Since the source code is not fully open sourced,
-# currently we will provide the prebuilt library
-# and demo codes
-import os
-
-__version__ = "0.1.1"
-
-
-def is_installed(package_name):
-    import pkg_resources
-    try:
-        pkg_resources.get_distribution(package_name)
-    except pkg_resources.DistributionNotFound:
-        return False
-    return True
-
-
-def get_version(package_name):
-    if not is_installed(package_name):
-        raise Exception("{} is not installed.".format(package_name))
-    major = pkg_resources.get_distribution(package).parsed_version.major
-    micro = pkg_resources.get_distribution(package).parsed_version.micro
-    minor = pkg_resources.get_distribution(package).parsed_version.minor
-    return major, micro, minor
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model',
-        type=_text_type,
-        default=None,
-        help='Name of model, which can be listed by --list_models')
-    parser.add_argument(
-        '--platform',
-        type=_text_type,
-        default=None,
-        help='Define platform, supports Windows/Linux/Android/iOS.')
-    parser.add_argument(
-        '--soc',
-        type=_text_type,
-        default=None,
-        help='Define soc for the platform, supports x86/x86-NVIDIA_GPU/ARM/jetson.'
-    )
-    parser.add_argument(
-        '--save_dir',
-        type=_text_type,
-        default=".",
-        help='Path to download and extract deployment SDK.')
-    parser.add_argument(
-        '--list_models',
-        required=False,
-        action="store_true",
-        default=False,
-        help='List all the supported models.')
-    parser.add_argument(
-        '--download_sdk',
-        required=False,
-        action="store_true",
-        default=False,
-        help='List all the supported models.')
-
-    return parser.parse_args()
-
-
-def read_sources():
-    from .download import download, download_and_decompress
-    user_dir = os.path.expanduser('~')
-    print("Updating the newest sdk information...")
-    source_cfgs = "https://bj.bcebos.com/paddlehub/fastdeploy/fastdeploy_newest_sources.cfg.1"
-    if os.path.exists(
-            os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")):
-        os.remove(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1"))
-    download(source_cfgs, user_dir)
-    categories = dict()
-    res = dict()
-    with open(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) as f:
-        for line in f:
-            if line.strip().startswith("#"):
-                continue
-            if line.strip() == "":
-                continue
-            category, model, plat, soc, url = line.strip().split('\t')
-            if category not in categories:
-                categories[category] = set()
-            categories[category].add(model)
-            if model not in res:
-                res[model] = dict()
-            if plat not in res[model]:
-                res[model][plat] = dict()
-            if soc not in res[model][plat]:
-                res[model][plat][soc] = dict()
-            res[model][plat][soc] = url
-    return categories, res
-
-
-def main():
-    from .download import download, download_and_decompress
-
-    args = parse_arguments()
-
-    if not args.list_models and not args.download_sdk:
-        print(
-            "Please use flag --list_models to show all the supported models, or use flag --download_sdk to download the specify SDK to deploy you model."
-        )
-        return
-
-    categories, all_sources = read_sources()
-    all_models = list(all_sources.keys())
-    all_models.sort()
-
-    if args.list_models:
-        print("Currently, FastDeploy supports {} models, list as below,\n".
-              format(len(all_models)))
-
-        for k, v in categories.items():
-            print("\nModel Category: {}".format(k))
-            print("_" * 100)
-            models = list(categories[k])
-            models.sort()
-            i = 0
-            while i < len(models):
-                if i == len(models) - 1:
-                    print(models[i].center(30))
-                    i += 1
-                elif i == len(models) - 2:
-                    print(models[i].center(30), models[i + 1].center(30))
-                    i += 2
-                else:
-                    print(models[i].center(30), models[i + 1].center(30),
-                          models[i + 2].center(30))
-                    i += 3
-        return
-
-    if not os.path.exists(args.save_dir):
-        print("The specified save_dir: {} is not exist.".format(args.save_dir))
-        return
-
-    if args.model is None or args.model == "":
-        print(
-            "Please define --model to choose which kind of model to deploy, use --list_models to show all the supported models."
-        )
-        return
-
-    if args.model not in all_sources:
-        print(
-            "{} is not supported, use --list_models to list all the models FastDeploy supported.".
-            format(args.model))
-        return
-
-    if args.platform is None or args.platform == "":
-        print(
-            "Please define --platform to choose which platform to deploy, supports windows/linux/android/ios."
-        )
-        return
-
-    if args.platform not in all_sources[args.model]:
-        print(
-            "The model:{} only supports platform of {}, {} is not supported now.".
-            format(args.model,
-                   list(all_sources[args.model].keys()), args.platform))
-        return
-
-    if args.soc is None or args.soc == "":
-        print(
-            "Please define --soc to choose which hardware to deploy, for model:{} and platform:{}, the available socs are {}.".
-            format(args.model, args.platform,
-                   list(all_sources[args.model][args.platform].keys())))
-        return
-
-    if args.soc not in all_sources[args.model][args.platform]:
-        print(
-            "The model:{} in platform:{} only supports soc of {}, {} is not supported now.".
-            format(args.model, args.platform,
-                   list(all_sources[args.model][args.platform].keys()),
-                   args.soc))
-        return
-
-    print("\nDownloading SDK:",
-          all_sources[args.model][args.platform][args.soc])
-
-    save_dir = args.save_dir
-    sdk_name = os.path.split(all_sources[args.model][args.platform][args.soc])[
-        -1].strip()
-    if all_sources[args.model][args.platform][args.soc].count(".zip") > 0:
-        sdk_name = os.path.split(all_sources[args.model][args.platform][
-            args.soc])[-1].strip().split(".zip")[0]
-        new_save_dir = os.path.join(args.save_dir, sdk_name)
-        if not os.path.exists(new_save_dir):
-            os.mkdir(new_save_dir)
-        save_dir = new_save_dir
-    download_and_decompress(
-        all_sources[args.model][args.platform][args.soc],
-        new_save_dir,
-        rename=sdk_name + ".zip")
-    os.remove(os.path.join(new_save_dir, sdk_name + ".zip"))
-    print("Done. All the files of SDK have been extracted in {}.".format(
-        new_save_dir))
-
-
-if __name__ == "__main__":
-    main()
-
-if is_installed('cpu_fastdeploy'):
-    from cpu_fastdeploy import *
-elif is_installed('gpu_fastdeploy'):
-    from gpu_fastdeploy import *
-else:
-    print(
-        "[INFO] Didn't install cpu/gpu sdk in your environment now, refer https://github.com/PaddlePaddle/FastDeploy/blob/release/0.2.0/docs/server_install.md for more details."
-    )
diff --git a/sdk_mannager/fastdeploy/__main__.py b/sdk_mannager/fastdeploy/__main__.py
deleted file mode 100644
index 6abf5cf8f..000000000
--- a/sdk_mannager/fastdeploy/__main__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import fastdeploy
-
-if __name__ == "__main__":
-    fastdeploy.main()
diff --git a/sdk_mannager/fastdeploy/download.py b/sdk_mannager/fastdeploy/download.py
deleted file mode 100644
index 3b5e92bf3..000000000
--- a/sdk_mannager/fastdeploy/download.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import os.path as osp
-import shutil
-import requests
-import time
-import zipfile
-import hashlib
-import tqdm
-import logging
-
-DOWNLOAD_RETRY_LIMIT = 3
-
-
-def md5check(fullname, md5sum=None):
-    if md5sum is None:
-        return True
-
-    logging.info("File {} md5 checking...".format(fullname))
-    md5 = hashlib.md5()
-    with open(fullname, 'rb') as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            md5.update(chunk)
-    calc_md5sum = md5.hexdigest()
-
-    if calc_md5sum != md5sum:
-        logging.info("File {} md5 check failed, {}(calc) != "
-                     "{}(base)".format(fullname, calc_md5sum, md5sum))
-        return False
-    return True
-
-
-def move_and_merge_tree(src, dst):
-    """
-    Move src directory to dst, if dst is already exists,
-    merge src to dst
-    """
-    if not osp.exists(dst):
-        shutil.move(src, dst)
-    else:
-        if not osp.isdir(src):
-            shutil.move(src, dst)
-            return
-        for fp in os.listdir(src):
-            src_fp = osp.join(src, fp)
-            dst_fp = osp.join(dst, fp)
-            if osp.isdir(src_fp):
-                if osp.isdir(dst_fp):
-                    move_and_merge_tree(src_fp, dst_fp)
-                else:
-                    shutil.move(src_fp, dst_fp)
-            elif osp.isfile(src_fp) and \
-                    not osp.isfile(dst_fp):
-                shutil.move(src_fp, dst_fp)
-
-
-def download(url, path, rename=None, md5sum=None, show_progress=False):
-    """
-    Download from url, save to path.
-    url (str): download url
-    path (str): download to given path
-    """
-    if not osp.exists(path):
-        os.makedirs(path)
-
-    fname = osp.split(url)[-1]
-    fullname = osp.join(path, fname)
-    if rename is not None:
-        fullname = osp.join(path, rename)
-    retry_cnt = 0
-    while not (osp.exists(fullname) and md5check(fullname, md5sum)):
-        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
-            retry_cnt += 1
-        else:
-            logging.debug("{} download failed.".format(fname))
-            raise RuntimeError("Download from {} failed. "
-                               "Retry limit reached".format(url))
-
-        logging.info("Downloading {} from {}".format(fname, url))
-
-        req = requests.get(url, stream=True)
-        if req.status_code != 200:
-            raise RuntimeError("Downloading from {} failed with code "
-                               "{}!".format(url, req.status_code))
-
-        # For protecting download interupted, download to
-        # tmp_fullname firstly, move tmp_fullname to fullname
-        # after download finished
-        tmp_fullname = fullname + "_tmp"
-        total_size = req.headers.get('content-length')
-        with open(tmp_fullname, 'wb') as f:
-            if total_size and show_progress:
-                for chunk in tqdm.tqdm(
-                        req.iter_content(chunk_size=1024),
-                        total=(int(total_size) + 1023) // 1024,
-                        unit='KB'):
-                    f.write(chunk)
-            else:
-                for chunk in req.iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-        shutil.move(tmp_fullname, fullname)
-        logging.debug("{} download completed.".format(fname))
-
-    return fullname
-
-
-def decompress(fname):
-    """
-    Decompress for zip and tar file
-    """
-    logging.info("Decompressing {}...".format(fname))
-
-    # For protecting decompressing interupted,
-    # decompress to fpath_tmp directory firstly, if decompress
-    # successed, move decompress files to fpath and delete
-    # fpath_tmp and remove download compress file.
-    fpath = osp.split(fname)[0]
-    fpath_tmp = osp.join(fpath, 'tmp')
-    if osp.isdir(fpath_tmp):
-        shutil.rmtree(fpath_tmp)
-        os.makedirs(fpath_tmp)
-
-    if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0:
-        with tarfile.open(fname) as tf:
-            tf.extractall(path=fpath_tmp)
-    elif fname.find('.zip') >= 0:
-        with zipfile.ZipFile(fname) as zf:
-            zf.extractall(path=fpath_tmp)
-    else:
-        raise TypeError("Unsupport compress file type {}".format(fname))
-
-    for f in os.listdir(fpath_tmp):
-        src_dir = osp.join(fpath_tmp, f)
-        dst_dir = osp.join(fpath, f)
-        move_and_merge_tree(src_dir, dst_dir)
-
-    shutil.rmtree(fpath_tmp)
-    logging.debug("{} decompressed.".format(fname))
-    return dst_dir
-
-
-def url2dir(url, path, rename=None):
-    full_name = download(url, path, rename, show_progress=True)
-    print("SDK is donwloaded, now extracting...")
-    if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
-        return decompress(full_name)
-
-
-def download_and_decompress(url, path='.', rename=None):
-    fname = osp.split(url)[-1]
-    fullname = osp.join(path, fname)
-    # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')):
-    #     fullname = osp.join(path, fname.split('.')[0])
-    nranks = 0
-    if nranks <= 1:
-        dst_dir = url2dir(url, path, rename)
-        if dst_dir is not None:
-            fullname = dst_dir
-    else:
-        lock_path = fullname + '.lock'
-        if not os.path.exists(fullname):
-            with open(lock_path, 'w'):
-                os.utime(lock_path, None)
-            if local_rank == 0:
-                dst_dir = url2dir(url, path, rename)
-                if dst_dir is not None:
-                    fullname = dst_dir
-                os.remove(lock_path)
-            else:
-                while os.path.exists(lock_path):
-                    time.sleep(1)
-    return
diff --git a/sdk_mannager/requirements.txt b/sdk_mannager/requirements.txt
deleted file mode 100644
index 5f64a9a19..000000000
--- a/sdk_mannager/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-tqdm
-six
diff --git a/sdk_mannager/setup.py b/sdk_mannager/setup.py
deleted file mode 100644
index 787071a1b..000000000
--- a/sdk_mannager/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import setuptools
-import fastdeploy
-import io
-import os
-
-with open("requirements.txt") as fin:
-    REQUIRED_PACKAGES = fin.read()
-
-
-def read(*names, **kwargs):
-    with io.open(
-            os.path.join(os.path.dirname(__file__), *names),
-            encoding=kwargs.get("encoding", "utf8")) as fp:
-        return fp.read()
-
-
-setuptools.setup(
-    name="fastdeploy-python",
-    version=fastdeploy.__version__,
-    author="FastDeploy",
-    author_email="fastdeploy@baidu.com",
-    description="FastDeploy is a toolkit to deploy deeplearning models.",
-    long_description=read("../README.md"),
-    long_description_content_type="text/markdown",
-    url="https://github.com/PaddlePaddle/FastDeploy",
-    packages=setuptools.find_packages(),
-    install_requires=REQUIRED_PACKAGES,
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: OS Independent",
-    ],
-    license='Apache 2.0',
-    entry_points={
-        'console_scripts': ['fastdeploy=fastdeploy.__init__:main', ]
-    })