add model zoo

2025-10-05 16:48:03 +08:00 · 2022-07-06 03:12:43 +00:00
parent 9d87046d78
commit 4c07d198ba
26 changed files with 560 additions and 639 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -192,12 +192,6 @@ install(
  DESTINATION ${CMAKE_INSTALL_PREFIX}
 )

-# Build demo cpp
-if(ENABLE_VISION)
-  add_executable(yolov5_exe ${PROJECT_SOURCE_DIR}/demo/cpp/vision/yolov5.cc)
-  target_link_libraries(yolov5_exe PUBLIC fastdeploy)
-endif()
-
 if(BUILD_FASTDEPLOY_PYTHON)
  add_definitions(-DBUILD_FASTDEPLOY_PYTHON)
  if("${PY_EXT_SUFFIX}" STREQUAL "")
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -11,6 +11,10 @@ set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
 list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/include)

+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 11)
+endif()
+
 find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib)
 list(APPEND FASTDEPLOY_LIBS ${FDLIB})
 if(ENABLE_ORT_BACKEND)
--- a/demo/python/vision/yolov5.py
+++ b/demo/python/vision/yolov5.py
@@ -1,10 +0,0 @@
-import fastdeploy as fd
-import cv2
-
-# 获取模型 和 测试图片
-# wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
-# wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx")
-im = cv2.imread("bus.jpg")
-result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
-print(result)
--- a/docs/api/runtime_option.md
+++ b/docs/api/runtime_option.md
@@ -0,0 +1,131 @@
+# RuntimeOption 推理后端配置
+
+FastDeploy产品中的Runtime包含多个推理后端，其各关系如下所示
+
+| 模型格式\推理后端 | ONNXRuntime | Paddle Inference | TensorRT | OpenVINO |
+| :---------------  | :---------- | :--------------- | :------- | :------- |
+|     Paddle        | 支持(内置Paddle2ONNX) | 支持 | 支持(内置Paddle2ONNX) | 支持 |
+|     ONNX          | 支持        | 支持(需通过X2Paddle转换) | 支持 | 支持 |
+
+各Runtime支持的硬件情况如下
+
+| 硬件/推理后端 | ONNXRuntime | Paddle Inference | TensorRT | OpenVINO |
+| :---------------  | :---------- | :--------------- | :------- | :------- |
+|   CPU        |  支持       | 支持        | 不支持 |   支持 |
+|   GPU       |   支持       | 支持       | 支持    | 支持   |
+
+在各模型的，均通过`RuntimeOption`来配置推理的后端，以及推理时的参数，例如在python中，加载模型后可通过如下代码打印推理配置
+```
+model = fastdeploy.vision.ultralytics.YOLOv5("yolov5s.onnx")
+print(model.runtime_option)
+```
+可看下如下输出
+
+```
+RuntimeOption(
+  backend : Backend.ORT                # 推理后端ONNXRuntime
+  cpu_thread_num : 8                   # CPU线程数（仅当使用CPU推理时有效）
+  device : Device.CPU                  # 推理硬件为CPU
+  device_id : 0                        # 推理硬件id（针对GPU）
+  model_file : yolov5s.onnx            # 模型文件路径
+  params_file :                        # 参数文件路径
+  model_format : Frontend.ONNX         # 模型格式
+  ort_execution_mode : -1              # 前辍为ort的表示为ONNXRuntime后端专用参数
+  ort_graph_opt_level : -1
+  ort_inter_op_num_threads : -1
+  trt_enable_fp16 : False              # 前辍为trt的表示为TensorRT后端专用参数
+  trt_enable_int8 : False
+  trt_max_workspace_size : 1073741824
+  trt_serialize_file :
+  trt_fixed_shape : {}
+  trt_min_shape : {}
+  trt_opt_shape : {}
+  trt_max_shape : {}
+  trt_max_batch_size : 32
+)
+```
+
+## Python 使用
+
+### RuntimeOption类
+`fastdeploy.RuntimeOption()`配置选项
+
+#### 配置选项
+> * **backend**(fd.Backend): `fd.Backend.ORT`/`fd.Backend.TRT`/`fd.Backend.PDINFER`/`fd.Backend.OPENVINO`等
+> * **cpu_thread_num**(int): CPU推理线程数，仅当CPU推理时有效
+> * **device**(fd.Device): `fd.Device.CPU`/`fd.Device.GPU`等
+> * **device_id**(int): 设备id，在GPU下使用
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **model_format**(Frontend): 模型格式, `fd.Frontend.PADDLE`/`fd.Frontend.ONNX`
+> * **ort_execution_mode**(int): ORT后端执行方式，0表示按顺序执行所有算子，1表示并行执行算子，默认为-1，即按ORT默认配置方式执行
+> * **ort_graph_opt_level**(int): ORT后端图优化等级；0：禁用图优化；1：基础优化 2：额外拓展优化；99：全部优化； 默认为-1，即按ORT默认配置方式执行
+> * **ort_inter_op_num_threads**(int): 当`ort_execution_mode`为1时，此参数设置算子间并行的线程数
+> * **trt_enable_fp16**(bool): TensorRT开启FP16推理
+> * **trt_enable_int8**(bool): TensorRT开启INT8推理
+> * **trt_max_workspace_size**(int): TensorRT配置的`max_workspace_size`参数
+> * **trt_fixed_shape**(dict[str : list[int]]): 当模型为动态shape，但实际推理时输入shape保持不变，则通过此参数配置输入的固定shape
+> * **trt_min_shape**(dict[str : list[int]]): 当模型为动态shape，且实际推理时输入shape也会变化，通过此参数配置输入的最小shape
+> * **trt_opt_shape**(dict[str : list[int]]): 当模型为动态shape, 且实际推理时输入shape也会变化，通过此参数配置输入的最优shape
+> * **trt_max_shape**(dict[str : list[int]]): 当模型为动态shape，且实际推理时输入shape也会变化，通过此参数配置输入的最大shape
+> * **trt_max_batch_size**(int): TensorRT推理时的最大batch数
+
+```
+import fastdeploy as fd
+
+option = fd.RuntimeOption()
+option.backend = fd.Backend.TRT
+# 当使用TRT后端，且为动态输入shape时
+# 需配置输入shape信息
+option.trt_min_shape = {"images": [1, 3, 224, 224]}
+option.trt_opt_shape = {"images": [4, 3, 224, 224]}
+option.trt_max_shape = {"images": [8, 3, 224, 224]}
+
+model = fd.vision.ppcls.Model("resnet50/inference.pdmodel",
+                              "resnet50/inference.pdiparams",
+                              "resnet50/inference_cls.yaml",
+                              runtime_option=option)
+```
+
+## C++ 使用
+
+### RuntimeOption 结构体
+`fastdeploy::RuntimeOption()`配置选项
+
+#### 配置选项
+> * **backend**(fastdeploy::Backend): `Backend::ORT`/`Backend::TRT`/`Backend::PDINFER`/`Backend::OPENVINO`等
+> * **cpu_thread_num**(int): CPU推理线程数，仅当CPU推理时有效
+> * **device**(fastdeploy::Device): `Device::CPU`/`Device::GPU`等
+> * **device_id**(int): 设备id，在GPU下使用
+> * **model_file**(string): 模型文件路径
+> * **params_file**(string): 参数文件路径
+> * **model_format**(fastdeploy::Frontend): 模型格式, `Frontend::PADDLE`/`Frontend::ONNX`
+> * **ort_execution_mode**(int): ORT后端执行方式，0表示按顺序执行所有算子，1表示并行执行算子，默认为-1，即按ORT默认配置方式执行
+> * **ort_graph_opt_level**(int): ORT后端图优化等级；0：禁用图优化；1：基础优化 2：额外拓展优化；99：全部优化； 默认为-1，即按ORT默认配置方式执行
+> * **ort_inter_op_num_threads**(int): 当`ort_execution_mode`为1时，此参数设置算子间并行的线程数
+> * **trt_enable_fp16**(bool): TensorRT开启FP16推理
+> * **trt_enable_int8**(bool): TensorRT开启INT8推理
+> * **trt_max_workspace_size**(int): TensorRT配置的`max_workspace_size`参数
+> * **trt_fixed_shape**(map<string, vector<int>>): 当模型为动态shape，但实际推理时输入shape保持不变，则通过此参数配置输入的固定shape
+> * **trt_min_shape**(map<string, vector<int>>): 当模型为动态shape，且实际推理时输入shape也会变化，通过此参数配置输入的最小shape
+> * **trt_opt_shape**(map<string, vector<int>>): 当模型为动态shape, 且实际推理时输入shape也会变化，通过此参数配置输入的最优shape
+> * **trt_max_shape**(map<string, vector<int>>): 当模型为动态shape，且实际推理时输入shape也会变化，通过此参数配置输入的最大shape
+> * **trt_max_batch_size**(int): TensorRT推理时的最大batch数
+
+```
+#include "fastdeploy/vision.h"
+
+int main() {
+  auto option = fastdeploy::RuntimeOption();
+  option.trt_min_shape["images"] = {1, 3, 224, 224};
+  option.trt_opt_shape["images"] = {4, 3, 224, 224};
+  option.trt_max_shape["images"] = {8, 3, 224, 224};
+
+  auto model = fastdeploy::vision::ppcls.Model(
+                           "resnet50/inference.pdmodel",
+                           "resnet50/inference.pdiparams",
+                           "resnet50/inference_cls.yaml",
+                           option);
+  return 0;
+}
+```
--- a/docs/cpp/README.md
+++ b/docs/cpp/README.md
@@ -1,110 +0,0 @@
-# C++部署
-
-## 准备预测库
-
-参考编译文档[FastDeploy编译](../compile/README.md)进行编译，或直接使用如下预编译库
-
-| 编译库 | 平台 | 支持设备 | 说明 |
-|:------ | :---- | :------- | :----- |
-|[fastdeploy-linux-x64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz) | Linux | CPU | 集成ONNXRuntime |
-|[fastdeploy-linux-x64-gpu-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-gpu-0.0.3.tgz) | Linux | CPU/GPU | 集成ONNXRuntime, TensorRT |
-|[fastdeploy-osx-x86_64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-x86_64-0.0.3.tgz) | Mac OSX Intel CPU | CPU | 集成ONNXRuntime |
-|[fastdeploy-osx-arm64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-arm64-0.0.3.tgz) | Mac OSX M1 CPU | CPU | 集成ONNXRuntime |
-
-
-## 使用
-
-FastDeploy提供了多种领域内的模型，可快速完成模型的部署，本文档以YOLOv5在Linux上的部署为例
-
-```
-# 下载库并解压
-wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
-tar xvf fastdeploy-linux-x64-0.0.3.tgz
-
-# 下载模型和测试图片
-wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
-wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
-```
-
-### YOLOv5预测代码
-
-准备如下`yolov5.cc`代码
-```
-#include "fastdeploy/vision.h"
-
-int main() {
-  typedef vis = fastdeploy::vision;
-
-  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx"); // 加载模型
-
-  if (!model.Initialized()) { // 判断模型是否初始化成功
-    std::cerr << "Initialize failed." << std::endl;
-    return -1;
-  }
-
-  cv::Mat im = cv::imread("bus.jpg"); // 读入图片
-
-  vis::DetectionResult res;
-  if (!model.Predict(&im, &res)) { // 预测图片
-    std::cerr << "Prediction failed." << std::endl;
-    return -1;
-  }
-
-  std::cout << res.Str() << std::endl; // 输出检测结果
-  return 0;
-}
-```
-
-### 编译代码
-
-编译前先完成CMakeLists.txt的开发，在`yolov5.cc`同级目录创建`CMakeLists.txt`文件，内容如下
-```
-PROJECT(yolov5_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
-# 在低版本ABI环境中，可通过如下代码进行兼容性编译
-# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-
-# 在下面指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR /ssd1/download/fastdeploy-linux-x64-0.0.3/)
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc)
-message(${FASTDEPLOY_LIBS})
-# 添加FastDeploy库依赖
-target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS})
-~
-```
-
-此时当前目录结构如下所示
-```
- demo_directory
-|___fastdeploy-linux-x64-0.0.3/ # 预测库解压
-|___yolov5.cc                   # 示例代码
-|___CMakeLists.txt              # cmake文件
-|___yolov5s.onnx                # 模型文件
-|___bus.jpeg                    # 测试图片
-```
-
-执行如下命令进行编译
-```
-cmake .
-make -j
-```
-编译后可执行二进制即为当前目录下的`yolov5_demo`，使用如下命令执行
-```
-./yolov5_demo
-```
-
-即会加载模型进行推理，得到结果如下
-```
-DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
-223.395126,403.948669, 345.337189, 867.339050, 0.856906, 0
-668.301758,400.781372, 808.441772, 882.534973, 0.829716, 0
-50.210758,398.571289, 243.123383, 905.016846, 0.805375, 0
-23.768217,214.979355, 802.627869, 778.840820, 0.756311, 5
-0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
-```
--- a/docs/tech/design.md
+++ b/docs/tech/design.md
@@ -1,24 +0,0 @@
-# FastDeploy
-
-FastDeploy分为`Runtime`和`应用`模块。
-
-## Runtime
-`Runtime`对应于不同硬件上的不同后端，大部分情况下，一种硬件对应于一种后端，但对于CPU、GPU, 存在多种后端，用户可根据自己的需求进行选择。
-
-| Runtime | 后端 |
-| :------ | :---- |
-| CPU(x86_64) | `fastdeploy::Backend::ORT` |
-| GPU(Nvidia) | `fastdeploy::Backend::ORT` / `fastdeploy::Backend::TRT` |
-
-具体文档参考 [Runtime文档](runtime.md)
-
-
-## 应用
-
-应用是基于`Runtime`提供的上层模型推理，集成了模型端到端的推理功能
-
- Vision
- Text
- Audio
-
-具体文档参考 [Vision文档](vision.md)
--- a/docs/tech/models.md
+++ b/docs/tech/models.md
@@ -1,63 +0,0 @@
-# 模型开发
-
-以`ultralytics/yolov5`为例，在`fastdeploy/vision`目录下新增`ultralytics`目录，并创建代码`yolov5.h`
-
-定义`YOLOv5`类
-
-```
-class YOLOv5 : public FastDeployModel {
- public:
-  // 构造函数指定模型路径，并默认为ONNX格式
-  YOLOv5(const std::string& model_file)
-    : FastDeployModel(model_file, "", Frontend::ONNX) {
-    size = {640, 640}; // 图像预处理resize大小
-    // 图像填充值
-    padding_value = {114.0, 114.0, 114.0};
-    // 是否只填充到满足stride的最小方框即可
-    bool is_mini_pad = false;
-    // 是否支持图像resize超过原图尺寸
-    bool is_scale_up = true;
-    // 步长，padding到长宽为stride的倍数
-    stride = 32;
-
-    // 通过下面的两个参数，来说明模型在CPU/GPU上支持的后端种类
-    // 指定Device后，默认情况下，会优先选择最前的后端
-    valid_cpu_backends = {Backend::ORT};
-    valid_gpu_backends = {Backend::ORT, Backend::TRT};
-  }
-
-  std::string ModelName() const; // 返回模型名
-
-  // 模型初始化, 须在此函数中主动调用基类的`InitBackend()`函数
-  // 来初始化runtime
-  // 一些模型前后处理的初始化也可在此函数中，如ppdet/ppcls创建一个
-  // 数据预处理pipeline
-  bool Init();
-
-  // 预处理，其中输入是vision::Mat结构，输出是FDTensor
-  // 输出提供给runtime进行推理使用
-  bool Preprocess(Mat* mat, FDTensor* output);
-
-  // 后处理，输入是runtime的输入FDTensor
-  // 一些跟模型相关的预处理参数
-  bool Postprocess(FDTensor& tensor, DetectionResult* res, float conf_thresh, float nms_iou_thresh);
-
-  // 端到端的推理函数，包含前后处理
-  // 因此一般也建议将后处理的部分参数放在这个接口中
-  bool Predict(cv::Mat* im, DetectionResult* result, float conf_thresh = 0.25, float nms_iou_thresh = 0.5);
-};
-```
-
-模型的实现上，并没有特别强的规范约束，但是
- 1. 一定要继承`FastDeployModel`
- 2. 确定可用的`valid_cpu_backends`和`valid_gpu_backends`
- 3. 要实现`Init()`/`ModelName()`/`Predict()`三个接口
- 4. 建议统一为`Preprocess`和`Postprocess`两个接口作为前后处理所用
-
-
-## 其它
-
-在`vision`中，会提供几类基础的数据结构使用，包括`vision::ClassifyResult`、`vision::DetectionResult`、`vision::SegmentationResult`等作为模型常见的输出结构。 但难免会遇到新的输出结构不在这几类中，对于一定要定制化的数据结构，默认按照下面方式处理
-
- 1. 如果是大量模型通用的结构，仍然实现在`vision/common.h`中，作为通用的输出结构
- 2. 如果只是某个模型需要，则实现在如`vision/ultralytics/yolov5.h`中，同时需要自行为此结构体进行pybind封装
--- a/docs/tech/runtime.md
+++ b/docs/tech/runtime.md
@@ -1,135 +0,0 @@
-# fastdeploy::Runtime
-
-## FDTensor Runtime的输入输出数据结构
-
-```
-struct FDTensor {
-  std::vector<int64_t> shape; // 形状
-  std::string name; // 命名
-  FDDataType dtype; // 数据类型
-  Device device = Device::CPU; // 数据存放设备
-
-  void* MutableData(); // 获取tensor内存buffer指针
-
-  // 获取tensor数据，如若tensor数据在其它设备
-  // 此函数会先将数据拷贝至CPU，再返回指向
-  // CPU内存buffer的指针
-  void* Data();
-
-  // 初始化Tensor，并复用外部数据指针
-  // Tensor的内存buffer将由外部的调用者来创建或释放
-  void SetExternalData(const std::vector<int>& new_shape,
-                       const FDDataType& data_type,
-                       void* data_buffer
-                       const Device& dev);
-
-  int Nbytes() const; // 返回tensor数据字节大小
-
-  int Numel() const; // 返回tensor元素个数
-
-  // Debug函数，打印tensor的信息，包含mean、max、min等
-  void PrintInfo(const std::string& prefix = "TensorInfo");
-};
-```
-
-FDTensor是前后处理与`Runtime`进行对接的数据结构，大多情况下建议通过`SetExternalData`来共享用户传入的数据，减小内存拷贝带来的开销。
-
-## Runtime 多后端推理引擎
-
-### RuntimeOption 引擎配置
-```
-struct RuntimeOption {
-  // 模型文件和权重文件
-  std::string model_file;
-  std::string params_file;
-  // 模型格式，当前可支持Frontend::PADDLE / Frontend::ONNX
-  Frontend model_format = Frontend::PADDLE;
-  Backend backend = Backend::ORT:
-
-  // CPU上运行时的线程数
-  int cpu_thread_num = 8;
-
-  // 推理硬件，当前支持Device::CPU / Device::GPU
-  // 在CPU/GPU上需与backend进行搭配选择
-  Device device;
-
-  // Backend::ORT的参数
-  int ort_graph_opt_level;
-  int ort_inter_op_num_threads;
-  int ort_execution_mode;
-
-  // Backend::TRT的参数
-  std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
-  std::map<std::string, std::vector<int32_t>> trt_max_shape;
-  std::map<std::string, std::vector<int32_t>> trt_min_shape;
-  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
-  std::string trt_serialize_file = "";
-  bool trt_enable_fp16 = false;
-  bool trt_enable_int8 = false;
-  size_t trt_max_batch_size = 32;
-};
-```
-
-
-### Runtime 引擎
-
-```
-struct Runtime {
-  // 加载模型，引擎初始化
-  bool Init(const RuntimeOption& _option);
-
-  // 进行推理
-  // 其中输入须正确配置tensor中的name
-  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
-
-  int NumInputs(); // 输入个数
-  int NumOutputs(); // 输出个数
-
-  TensorInfo GetInputInfo(int index) // 获取输入信息，包括shape, dtype, name
-  TensorInfo GetOutputInfo(int index) // 获取输出信息，包括shape, dtype, name
-
-  RuntimeOption option; // 引擎的配置信息
-};
-```
-
-
-## Runtime使用示例
-
-### C++
-
-```
-#include "fastdeploy/fastdeploy_runtime.h"
-
-int main() {
-  auto option = fastdeploy::RuntimeOption();
-  option.model_file = "resnet50/inference.pdmodel";
-  option.params_file = "resnet50/inference.pdiparams";
-
-  auto runtime = fastdeploy::Runtime();
-  assert(runtime.Init(option));
-
-  // 需准备好输入tensor
-  std::vector<FDTensor> inputs;
-
-  std::vector<FDTensor> outputs;
-  assert(runtime.Infer(tensors, &outputs));
-
-  // 输出tensor的debug信息查看
-  outputs[0].PrintInfo();
-}
-```
-
-### Python
-
-```
-import fastdeploy as fd
-import numpy as np
-
-option = fd.RuntimeOption();
-option.model_file = "resnet50/inference.pdmodel"
-option.params_file = "resnet50/inference.pdiparams";
-
-runtime = fd.Runtime(option)
-
-result = runtime.infer({"image": np.random.rand(1, 3, 224, 224)});
-```
--- a/docs/tech/vision.md
+++ b/docs/tech/vision.md
@@ -1,74 +0,0 @@
-# Vision
-
-Vision是FastDeploy中的视觉模型模块，包含`processors`和`utils`两个公共模块，以及模型模块。
-
-## processors 图像处理模块
-
-`processors`提供了常见的图像处理操作，并为各操作实现不同的后端，如当前支持的CPU以及GPU两种处理方式，在模型中预算中，开发者调用`processors`提供的API，即可快速在不同的处理后端进行切换。
-
-默认在CPU上进行处理
-```
-namespace vis = fastdeploy::vision;
-
-im = cv2.imread("test.jpg");
-
-vis::Mat mat(im);
-assert(vis::Resize::Run(&mat, 224, 224));
-assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
-assert(vis::HWC2CHW::Run(&mat));
-```
-
-切换为CUDA GPU进行处理
-```
-namespace vis = fastdeploy::vision;
-vis::Processor::default_lib = vis::ProcessorLib::OPENCV_CUDA;
-
-im = cv2.imread("test.jpg");
-
-vis::Mat mat(im);
-assert(vis::Resize::Run(&mat, 224, 224));
-assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
-assert(vis::HWC2CHW::Run(&mat));
-```
-
-在处理过程中，通过`fastdeploy::vision::Mat`作为传递的数据结构
-```
-struct Mat {
-  Mat(cv::Mat); // 通过`cv::Mat`进行构造
-  FDDataType Type(); // 数值类型
-  int Channels(); // 通道数
-  int Width(); // 宽
-  int Height(); // 高
-
-  // 获取图像，如Mat在GPU上，则会拷贝到CPU上再返回
-  cv::Mat GetCpuMat();
-
-  // 获取图像，如Mat在CPU上，则会拷贝到GPU上再返回
-  cv::cuda::GpuMat GetGpuMat();
-
-  void ShareWithTensor(FDTensor* tensor); // 构造一个FDTensor，并共享内存
-  bool CopyToTensor(FDTensor* tensor); // 构造一个CPU上的FDTensor，并将数据拷贝过去
-
-  Layout layout; // 数据排布，支持Layout::HWC / Layout::CHW
-  Device device; // 数据存放设备，支持Device::CPU / Device::GPU
-};
-```
-
-## utilities模块 工具模块
-
-提供一些常见的函数，如分类模型常用的`TopK`选择，检测模型的`NMS`操作。同样后面可以考虑将后处理的实现也有不同后端
-
-
-## visualize 可视化模块
-
-提供一些可视化函数，如检测、分割、OCR都需要这种函数来看可视化的效果
-
-## 模型模块
-
-这个是`Vision`中最重要的模块，所有的模块均通过`域名` + `模型名`来划分，如
-
- vision::ppdet::YOLOv3  // PaddleDetection的YOLOv3模型
- vision::ppdet::RCNN  // PaddleDetection的RCNN类模型
- vision::ultralytics::YOLOv5 // https://github.com/ultralytics/yolov5 YOLOv5模型
-
-模型的增加参考[模型开发](models.md)
--- a/docs/usage/model.md
+++ b/docs/usage/model.md
@@ -1,57 +0,0 @@
-# FastDeploy模型
-
-目前支持的模型如下
- [fastdeploy.vision.ppcls.Model](vision/ppcls.md) PaddleClas里的所有分类模型
- [fastdeploy.vision.ultralytics/YOLOv5](vision/ultralytics.md) [ultralytics/yolov5](https://github.com/ultralytics/yolov5)模型
-
-具体模型使用方式可参考各模型文档API和示例说明。 各模型在运行时均有默认的Runtime配置，本文档说明如何修改模型的后端配置，其中如下代码为跑YOLOv5的模型Python示例代码
-```
-import fastdeploy as fd
-model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx")
-
-import cv2
-im = cv2.imread('bus.jpg')
-
-result = model.predict(im)
-
-print(model.runtime_option)
-```
-通过`print(model.runtime_option)`可以看到如下信息
-```
-RuntimeOption(
-  backend : Backend.ORT                  # 当前推理后端为ONNXRuntime
-  cpu_thread_num : 8                     # 推理时CPU线程数设置（仅当模型在CPU上推理时有效）
-  device : Device.GPU                    # 当前推理设备为GPU
-  device_id : 0                          # 当前推理设备id为0
-  model_file : yolov5s.onnx              # 模型文件路径
-  model_format : Frontend.ONNX           # 模型格式，当前为ONNX格式
-  ort_execution_mode : -1                # ONNXRuntime后端的配置参数，-1表示默认
-  ort_graph_opt_level : -1               # ONNXRuntime后端的配置参数, -1表示默认
-  ort_inter_op_num_threads : -1          # ONNXRuntime后端的配置参数，-1表示默认
-  params_file :                          # 参数文件（ONNX模型无此文件）
-  trt_enable_fp16 : False                # TensorRT参数
-  trt_enable_int8 : False                # TensorRT参数
-  trt_fixed_shape : {}                   # TensorRT参数
-  trt_max_batch_size : 32                # TensorRT参数
-  trt_max_shape : {}                     # TensorRT参数
-  trt_max_workspace_size : 1073741824    # TensorRT参数
-  trt_min_shape : {}                     # TensorRT参数
-  trt_opt_shape : {}                     # TensorRT参数
-  trt_serialize_file :                   # TensorRT参数
-)
-```
-
-会注意到参数名以`ort`开头的，均为ONNXRuntime后端专有的参数；以`trt`的则为TensorRT后端专有的参数。各后端与参数的配置，可参考[RuntimeOption](runtime_option.md)说明。
-
-## 切换模型推理方式
-
-一般而言，用户只需关注推理是在哪种Device下即可。 当然有更进一步需求，可以再为Device选择不同的Backend，但配置时注意Device与Backend的搭配。 如Backend::TRT只支持Device为GPU, 而Backend::ORT则同时支持CPU和GPU
-
-```
-import fastdeploy as fd
-option = fd.RuntimeOption()
-option.device = fd.Device.CPU
-option.cpu_thread_num = 12
-model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx", option)
-print(model.runtime_option)
-```
--- a/docs/usage/vision/ppcls.md
+++ b/docs/usage/vision/ppcls.md
@@ -1,104 +0,0 @@
-# PaddleClas分类模型推理
-
-PaddleClas模型导出参考[PaddleClas](https://github.com/PaddlePaddle/PaddleClas.git)
-
-## Python API说明
-
-### Model类
-```
-fastdeploy.vision.ppcls.Model(model_file, params_file, config_file, runtime_option=None, model_format=fastdeploy.Frontend.PADDLE)
-```
-
-**参数**
-
-> * **model_file**(str): 模型文件，如resnet50/inference.pdmodel  
-> * **params_file**(str): 参数文件，如resnet50/inference.pdiparams  
-> * **config_file**(str): 配置文件，来源于PaddleClas提供的推理配置文件，如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)  
-> * **runtime_option**(fd.RuntimeOption): 后端推理的配置, 默认为None，即采用默认配置  
-> * **model_format**(fd.Frontend): 模型格式说明，PaddleClas的模型格式均为Frontend.PADDLE  
-
-#### predict接口
-```
-Model.predict(image_data, topk=1)
-```
-
-> **参数**
->
-> > * **image_data**(np.ndarray): 输入数据, 注意需为HWC，RGB格式  
-> > * **topk**(int): 取前top的分类  
-
-> **返回结果**
->
-> > * **result**(ClassifyResult)：结构体包含`label_ids`和`scores`两个list成员变量，表示类别，和各类别对应的置信度
-
-### 示例
-
-> ```
-> import fastdeploy.vision as vis
-> import cv2
-> model = vis.ppcls.Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml")
-> im = cv2.imread("test.jpeg")
-> result = model.predict(im, topk=5)
-> print(result.label_ids[0], result.scores[0])
-> ```
-
-## C++ API说明
-
-需添加头文件`#include "fastdeploy/vision.h"`
-
-### Model类
-
-```
-fastdeploy::vision::ppcls::Model(
-                    const std::string& model_file,
-                    const std::string& params_file,
-                    const std::string& config_file,
-                    const RuntimeOption& custom_option = RuntimeOption(),
-                    const Frontend& model_format = Frontend::PADDLE)
-```
-
-**参数**
-> * **model_file**: 模型文件，如resnet50/inference.pdmodel  
-> * **params_file**: 参数文件，如resnet50/inference.pdiparams  
-> * **config_file**: 配置文件，来源于PaddleClas提供的推理配置文件，如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)  
-> * **runtime_option**: 后端推理的配置, 不设置的情况下，采用默认配置  
-> * **model_format**: 模型格式说明，PaddleClas的模型格式均为Frontend.PADDLE  
-
-#### Predict接口
-```
-bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk = 1)
-```
-
-> **参数**
-> > * **im**: 输入图像数据，须为HWC，RGB格式(注意传入的im在预处理过程中会被修改)  
-> > * **result**: 分类结果  
-> > * **topk**: 取分类结果前topk  
-
-> **返回结果**
-> > true或false，表示预测成功与否
-
-### 示例
-> ```
-> #include "fastdeploy/vision.h"
->
-> int main() {
->   typedef vis = fastdeploy::vision;
->   auto model = vis::ppcls::Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml");
->
->   if (!model.Initialized()) {
->     std::cerr << "Initialize failed." << std::endl;
->     return -1;
->   }
->
->   cv::Mat im = cv::imread("test.jpeg");
->
->   vis::ClassifyResult res;
->   if (!model.Predict(&im, &res, 5)) {
->     std::cerr << "Prediction failed." << std::endl;
->     return -1;
->   }
->
->   std::cout << res.label_ids[0] << " " << res.scores[0] << std::endl;
->   return 0;
-> }
-```
--- a/fastdeploy/init.py
+++ b/fastdeploy/init.py
@@ -17,6 +17,7 @@ from .fastdeploy_main import Frontend, Backend, FDDataType, TensorInfo, RuntimeO
 from .fastdeploy_runtime import *
 from . import fastdeploy_main as C
 from . import vision
+from .download import download


 def TensorInfoStr(tensor_info):
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,11 +22,11 @@

 namespace fastdeploy {

-enum class Device { DEFAULT, CPU, GPU };
+enum FASTDEPLOY_DECL Device { DEFAULT, CPU, GPU };

 FASTDEPLOY_DECL std::string Str(Device& d);

-enum class FDDataType {
+enum FASTDEPLOY_DECL FDDataType {
  BOOL,
  INT16,
  INT32,
@@ -56,4 +56,4 @@ FASTDEPLOY_DECL std::string Str(FDDataType& fdt);
 FASTDEPLOY_DECL int32_t FDDataTypeSize(FDDataType data_dtype);

 FASTDEPLOY_DECL std::string FDDataTypeStr(FDDataType data_dtype);
-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/download.py
+++ b/fastdeploy/download.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import os.path as osp
+import shutil
+import requests
+import time
+import zipfile
+import hashlib
+import tqdm
+import logging
+
+DOWNLOAD_RETRY_LIMIT = 3
+
+
+def md5check(fullname, md5sum=None):
+    if md5sum is None:
+        return True
+
+    logging.info("File {} md5 checking...".format(fullname))
+    md5 = hashlib.md5()
+    with open(fullname, 'rb') as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            md5.update(chunk)
+    calc_md5sum = md5.hexdigest()
+
+    if calc_md5sum != md5sum:
+        logging.info("File {} md5 check failed, {}(calc) != "
+                     "{}(base)".format(fullname, calc_md5sum, md5sum))
+        return False
+    return True
+
+
+def move_and_merge_tree(src, dst):
+    """
+    Move src directory to dst, if dst is already exists,
+    merge src to dst
+    """
+    if not osp.exists(dst):
+        shutil.move(src, dst)
+    else:
+        if not osp.isdir(src):
+            shutil.move(src, dst)
+            return
+        for fp in os.listdir(src):
+            src_fp = osp.join(src, fp)
+            dst_fp = osp.join(dst, fp)
+            if osp.isdir(src_fp):
+                if osp.isdir(dst_fp):
+                    move_and_merge_tree(src_fp, dst_fp)
+                else:
+                    shutil.move(src_fp, dst_fp)
+            elif osp.isfile(src_fp) and \
+                    not osp.isfile(dst_fp):
+                shutil.move(src_fp, dst_fp)
+
+
+def download(url, path, rename=None, md5sum=None, show_progress=False):
+    """
+    Download from url, save to path.
+    url (str): download url
+    path (str): download to given path
+    """
+    if not osp.exists(path):
+        os.makedirs(path)
+
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    if rename is not None:
+        fullname = osp.join(path, rename)
+    retry_cnt = 0
+    while not (osp.exists(fullname) and md5check(fullname, md5sum)):
+        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
+            retry_cnt += 1
+        else:
+            logging.debug("{} download failed.".format(fname))
+            raise RuntimeError("Download from {} failed. "
+                               "Retry limit reached".format(url))
+
+        logging.info("Downloading {} from {}".format(fname, url))
+
+        req = requests.get(url, stream=True)
+        if req.status_code != 200:
+            raise RuntimeError("Downloading from {} failed with code "
+                               "{}!".format(url, req.status_code))
+
+        # For protecting download interupted, download to
+        # tmp_fullname firstly, move tmp_fullname to fullname
+        # after download finished
+        tmp_fullname = fullname + "_tmp"
+        total_size = req.headers.get('content-length')
+        with open(tmp_fullname, 'wb') as f:
+            if total_size and show_progress:
+                for chunk in tqdm.tqdm(
+                        req.iter_content(chunk_size=1024),
+                        total=(int(total_size) + 1023) // 1024,
+                        unit='KB'):
+                    f.write(chunk)
+            else:
+                for chunk in req.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+        shutil.move(tmp_fullname, fullname)
+        logging.debug("{} download completed.".format(fname))
+
+    return fullname
+
+
+def decompress(fname):
+    """
+    Decompress for zip and tar file
+    """
+    logging.info("Decompressing {}...".format(fname))
+
+    # For protecting decompressing interupted,
+    # decompress to fpath_tmp directory firstly, if decompress
+    # successed, move decompress files to fpath and delete
+    # fpath_tmp and remove download compress file.
+    fpath = osp.split(fname)[0]
+    fpath_tmp = osp.join(fpath, 'tmp')
+    if osp.isdir(fpath_tmp):
+        shutil.rmtree(fpath_tmp)
+        os.makedirs(fpath_tmp)
+
+    if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0:
+        with tarfile.open(fname) as tf:
+            tf.extractall(path=fpath_tmp)
+    elif fname.find('.zip') >= 0:
+        with zipfile.ZipFile(fname) as zf:
+            zf.extractall(path=fpath_tmp)
+    else:
+        raise TypeError("Unsupport compress file type {}".format(fname))
+
+    for f in os.listdir(fpath_tmp):
+        src_dir = osp.join(fpath_tmp, f)
+        dst_dir = osp.join(fpath, f)
+        move_and_merge_tree(src_dir, dst_dir)
+
+    shutil.rmtree(fpath_tmp)
+    logging.debug("{} decompressed.".format(fname))
+    return dst_dir
+
+
+def url2dir(url, path, rename=None):
+    full_name = download(url, path, rename, show_progress=True)
+    print("SDK is donwloaded, now extracting...")
+    if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
+        return decompress(full_name)
+
+
+def download_and_decompress(url, path='.', rename=None):
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')):
+    #     fullname = osp.join(path, fname.split('.')[0])
+    nranks = 0
+    if nranks <= 1:
+        dst_dir = url2dir(url, path, rename)
+        if dst_dir is not None:
+            fullname = dst_dir
+    else:
+        lock_path = fullname + '.lock'
+        if not os.path.exists(fullname):
+            with open(lock_path, 'w'):
+                os.utime(lock_path, None)
+            if local_rank == 0:
+                dst_dir = url2dir(url, path, rename)
+                if dst_dir is not None:
+                    fullname = dst_dir
+                os.remove(lock_path)
+            else:
+                while os.path.exists(lock_path):
+                    time.sleep(1)
+    return
--- a/fastdeploy/fastdeploy_runtime.h
+++ b/fastdeploy/fastdeploy_runtime.h
@@ -13,15 +13,15 @@
 // limitations under the License.
 #pragma once

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/utils/perf.h"
 #include <map>
 #include <vector>
+#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/utils/perf.h"

 namespace fastdeploy {

-enum class Backend { UNKNOWN, ORT, TRT, PDRT };
-enum class Frontend { PADDLE, ONNX };
+enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
+enum FASTDEPLOY_DECL Frontend { PADDLE, ONNX };

 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();

@@ -63,9 +63,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
  size_t trt_max_batch_size = 32;
  size_t trt_max_workspace_size = 1 << 30;

-  std::string model_file = "";  // Path of model file
-  std::string params_file = ""; // Path of parameters file, can be empty
-  Frontend model_format = Frontend::PADDLE; // format of input model
+  std::string model_file = "";   // Path of model file
+  std::string params_file = "";  // Path of parameters file, can be empty
+  Frontend model_format = Frontend::PADDLE;  // format of input model
 };

 struct FASTDEPLOY_DECL Runtime {
@@ -91,4 +91,4 @@ struct FASTDEPLOY_DECL Runtime {
 private:
  BaseBackend* backend_;
 };
-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -82,7 +82,7 @@ void BindRuntime(pybind11::module& m) {
      .value("UNKOWN", Backend::UNKNOWN)
      .value("ORT", Backend::ORT)
      .value("TRT", Backend::TRT)
-      .value("PDRT", Backend::PDRT);
+      .value("PDINFER", Backend::PDINFER);
  pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
                            "Frontend for inference.")
      .value("PADDLE", Frontend::PADDLE)
@@ -111,4 +111,4 @@ void BindRuntime(pybind11::module& m) {
  m.def("get_available_backends", []() { return GetAvailableBackends(); });
 }

-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/version.py
+++ b/fastdeploy/version.py
@@ -4,4 +4,4 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 version = '0.3.0'
-git_version = 'e388f48a0d6b97fc09dd7bd64c4aaa0f5687690c'
+git_version = '9d87046d78bcfc10c07b1e7a2bccda3914452201'
--- a/fastdeploy/vision/common/result.h
+++ b/fastdeploy/vision/common/result.h
@@ -17,10 +17,15 @@

 namespace fastdeploy {
 namespace vision {
-enum ResultType { UNKNOWN, CLASSIFY, DETECTION, SEGMENTATION };
+enum FASTDEPLOY_DECL ResultType {
+  UNKNOWN_RESULT,
+  CLASSIFY,
+  DETECTION,
+  SEGMENTATION
+};

 struct FASTDEPLOY_DECL BaseResult {
-  ResultType type = ResultType::UNKNOWN;
+  ResultType type = ResultType::UNKNOWN_RESULT;
 };

 struct FASTDEPLOY_DECL ClassifyResult : public BaseResult {
@@ -53,5 +58,5 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
  std::string Str();
 };

-} // namespace vision
-} // namespace fastdeploy
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/ppcls/model.cc
+++ b/fastdeploy/vision/ppcls/model.cc
@@ -11,8 +11,8 @@ Model::Model(const std::string& model_file, const std::string& params_file,
             const std::string& config_file, const RuntimeOption& custom_option,
             const Frontend& model_format) {
  config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDRT};
-  valid_gpu_backends = {Backend::ORT, Backend::PDRT};
+  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_gpu_backends = {Backend::ORT, Backend::PDINFER};
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
@@ -135,6 +135,6 @@ bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
  return true;
 }

-} // namespace ppcls
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace ppcls
+}  // namespace vision
+}  // namespace fastdeploy
--- a/fastdeploy/vision/ultralytics/yolov5.cc
+++ b/fastdeploy/vision/ultralytics/yolov5.cc
@@ -44,10 +44,10 @@ void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
 YOLOv5::YOLOv5(const std::string& model_file,
               const RuntimeOption& custom_option,
               const Frontend& model_format) {
-  valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端
-  valid_gpu_backends = {Backend::ORT}; // 指定可用的GPU后端
+  valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
+  valid_gpu_backends = {Backend::ORT};  // 指定可用的GPU后端
  runtime_option = custom_option;
-  runtime_option.model_format = model_format; // 指定模型格式
+  runtime_option.model_format = model_format;  // 指定模型格式
  runtime_option.model_file = model_file;
  // initialized用于标记模型是否初始化成功
  // C++或Python中可调用YOLOv5.Intialized() /
@@ -58,8 +58,8 @@ YOLOv5::YOLOv5(const std::string& model_file,
 YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
               const RuntimeOption& custom_option,
               const Frontend& model_format) {
-  valid_cpu_backends = {Backend::PDRT}; // 指定可用的CPU后端
-  valid_gpu_backends = {Backend::PDRT}; // 指定可用的GPU后端
+  valid_cpu_backends = {Backend::PDINFER};  // 指定可用的CPU后端
+  valid_gpu_backends = {Backend::PDINFER};  // 指定可用的GPU后端
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
@@ -102,7 +102,7 @@ bool YOLOv5::Preprocess(Mat* mat, FDTensor* output,
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
-  output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
  return true;
 }

@@ -166,7 +166,6 @@ bool YOLOv5::Postprocess(

 bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
                     float nms_iou_threshold) {
-
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_START(0)
 #endif
@@ -217,6 +216,6 @@ bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
  return true;
 }

-} // namespace ultralytics
-} // namespace vision
-} // namespace fastdeploy
+}  // namespace ultralytics
+}  // namespace vision
+}  // namespace fastdeploy
--- a/model_zoo/vision/yolov5/README.md
+++ b/model_zoo/vision/yolov5/README.md
@@ -0,0 +1,45 @@
+# YOLOv5部署示例
+
+本文档说明如何进行[YOLOv5](https://github.com/ultralytics/yolov5)的快速部署推理。本目录结构如下
+```
+.
+├── cpp                 # C++ 代码目录
+│   ├── CMakeLists.txt  # C++ 代码编译CMakeLists文件
+│   ├── README.md       # C++ 代码编译部署文档
+│   └── yolov5.cc       # C++ 示例代码
+├── README.md           # YOLOv5 部署文档
+└── yolov5.py           # Python示例代码
+```
+
+## 安装FastDeploy
+
+使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
+```
+# 安装fastdeploy-python工具
+pip install fastdeploy-python
+
+# 安装vision-cpu模块
+fastdeploy install vision-cpu
+```
+
+## Python部署
+
+执行如下代码即会自动下载YOLOv5模型和测试图片
+```
+python yolov5.py
+```
+
+执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
+```
+DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
+223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0
+668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0
+50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0
+23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5
+0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
+```
+
+## 其它文档
+
+- [C++部署](./cpp/README.md)
+- [YOLOv5 API文档](./api.md)
--- a/model_zoo/vision/yolov5/api.md
+++ b/model_zoo/vision/yolov5/api.md
@@ -0,0 +1,71 @@
+# YOLOv5 API说明
+
+## Python API
+
+### YOLOv5类
+```
+fastdeploy.vision.ultralytics.YOLOv5(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
+```
+YOLOv5模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov5s.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(Frontend): 模型格式
+
+#### predict函数
+> ```
+> YOLOv5.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
+> ```
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，RGB格式
+> > * **conf_threshold**(float): 检测框置信度过滤阈值
+> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
+
+示例代码参考[yolov5.py](./yolov5.py)
+
+
+## C++ API
+
+### YOLOv5类
+```
+fastdeploy::vision::ultralytics::YOLOv5(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const Frontend& model_format = Frontend::ONNX)
+```
+YOLOv5模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov5s.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(Frontend): 模型格式
+
+#### predict函数
+> ```
+> YOLOv5::predict(cv::Mat* im, DetectionResult* result,
+>                 float conf_threshold = 0.25,
+>                 float nms_iou_threshold = 0.5)
+> ```
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，RGB格式
+> > * **result**: 检测结果，包括检测框，各个框的置信度
+> > * **conf_threshold**: 检测框置信度过滤阈值
+> > * **nms_iou_threshold**: NMS处理过程中iou阈值
+
+示例代码参考[cpp/yolov5.cc](cpp/yolov5.cc)
+
+## 其它API使用
+
+- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
--- a/model_zoo/vision/yolov5/cpp/CMakeLists.txt
+++ b/model_zoo/vision/yolov5/cpp/CMakeLists.txt
@@ -0,0 +1,18 @@
+PROJECT(yolov5_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
+
+# 在低版本ABI环境中，可通过如下代码进行兼容性编译
+# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+
+# 在下面指定下载解压后的fastdeploy库路径
+set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc)
+message(${FASTDEPLOY_LIBS})
+# 添加FastDeploy库依赖
+target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS})
--- a/model_zoo/vision/yolov5/cpp/README.md
+++ b/model_zoo/vision/yolov5/cpp/README.md
@@ -0,0 +1,30 @@
+# 编译YOLOv5示例
+
+
+```
+# 下载和解压预测库
+wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
+tar xvf fastdeploy-linux-x64-0.0.3.tgz
+
+# 编译示例代码
+mkdir build & cd build
+cmake ..
+make -j
+
+# 下载模型和图片
+wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
+wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
+
+# 执行
+./yolov5_demo
+```
+
+执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
+```
+DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
+223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0
+668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0
+50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0
+23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5
+0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
+```
--- a/model_zoo/vision/yolov5/cpp/yolov5.cc
+++ b/model_zoo/vision/yolov5/cpp/yolov5.cc
@@ -15,35 +15,26 @@
 #include "fastdeploy/vision.h"

 int main() {
-  auto model = fastdeploy::vision::ultralytics::YOLOv5("yolov5s.onnx");
-  model.EnableDebug();
+  namespace vis = fastdeploy::vision;
+  auto model = vis::ultralytics::YOLOv5("yolov5s.onnx");
  if (!model.Initialized()) {
-    std::cout << "Init Failed." << std::endl;
+    std::cerr << "Init Failed." << std::endl;
    return -1;
  }
  cv::Mat im = cv::imread("bus.jpg");
+  cv::Mat vis_im = im.clone();

-  for (size_t i = 0; i < 10; ++i) {
-    auto im1 = im.clone();
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(&im1, &res)) {
-      std::cout << "Predict Failed." << std::endl;
-      return -1;
-    }
+  vis::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Prediction Failed." << std::endl;
+    return -1;
  }

-  {
-    fastdeploy::vision::DetectionResult res;
-    auto vis_im = im.clone();
-    if (!model.Predict(&im, &res)) {
-      std::cout << "Predict Failed." << std::endl;
-      return -1;
-    }
+  // 输出预测框结果
+  std::cout << res.Str() << std::endl;

-    fastdeploy::vision::Visualize::VisDetection(&vis_im, res);
-    cv::imwrite("vis.jpg", vis_im);
-    // Print Detection Result
-    std::cout << res.Str() << std::endl;
-  }
-    return 0;
+  // 可视化预测结果
+  vis::Visualize::VisDetection(&vis_im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  return 0;
 }
--- a/model_zoo/vision/yolov5/yolov5.py
+++ b/model_zoo/vision/yolov5/yolov5.py
@@ -0,0 +1,23 @@
+import fastdeploy as fd
+import cv2
+
+# 下载模型和测试图片
+model_url = "https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx"
+test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg"
+fd.download(model_url, ".", show_progress=True)
+fd.download(test_jpg_url, ".", show_progress=True)
+
+# 加载模型
+model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx")
+
+# 预测图片
+im = cv2.imread("bus.jpg")
+result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
+
+# 可视化结果
+fd.vision.visualize.vis_detection(im, result)
+cv2.imwrite("vis_result.jpg", im)
+
+# 输出预测结果
+print(result)
+print(model.runtime_option)