Add YOLOv5Face model support (#38)

* update .gitignore * Added checking for cmake include dir * fixed missing trt_backend option bug when init from trt * remove un-need data layout and add pre-check for dtype * changed RGB2BRG to BGR2RGB in ppcls model * add model_zoo yolov6 c++/python demo * fixed CMakeLists.txt typos * update yolov6 cpp/README.md * add yolox c++/pybind and model_zoo demo * move some helpers to private * fixed CMakeLists.txt typos * add normalize with alpha and beta * add version notes for yolov5/yolov6/yolox * add copyright to yolov5.cc * revert normalize * fixed some bugs in yolox * Add YOLOv5Face Model support * fixed examples/vision typos * fixed runtime_option print func bugs
2025-10-06 17:17:14 +08:00 · 2022-07-25 21:55:56 +08:00
parent 36fc77e6b8
commit fc71d79e58
27 changed files with 1240 additions and 16 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ fastdeploy.egg-info
 fastdeploy/version.py
 fastdeploy/LICENSE*
 fastdeploy/ThirdPartyNotices*
 *.so*
 fastdeploy/libs/third_libs
--- a/examples/vision/deepcam_yolov5face.cc
+++ b/examples/vision/deepcam_yolov5face.cc
@@ -0,0 +1,53 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision.h"
 int main() {
  namespace vis = fastdeploy::vision;
  std::string model_file = "../resources/models/yolov5s-face.onnx";
  std::string img_path = "../resources/images/test_face_det.jpg";
  std::string vis_path =
      "../resources/outputs/deepcam_yolov5face_vis_result.jpg";
  auto model = vis::deepcam::YOLOv5Face(model_file);
  if (!model.Initialized()) {
    std::cerr << "Init Failed! Model: " << model_file << std::endl;
    return -1;
  } else {
    std::cout << "Init Done! Model:" << model_file << std::endl;
  }
  model.EnableDebug();
  cv::Mat im = cv::imread(img_path);
  cv::Mat vis_im = im.clone();
  vis::FaceDetectionResult res;
  if (!model.Predict(&im, &res, 0.1f, 0.3f)) {
    std::cerr << "Prediction Failed." << std::endl;
    return -1;
  } else {
    std::cout << "Prediction Done!" << std::endl;
  }
  // 输出预测框结果
  std::cout << res.Str() << std::endl;
  // 可视化预测结果
  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
  cv::imwrite(vis_path, vis_im);
  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
  return 0;
 }
--- a/fastdeploy/init.py
+++ b/fastdeploy/init.py
@@ -32,6 +32,8 @@ def RuntimeOptionStr(runtime_option):
    for attr in attrs:
        if attr.startswith("__"):
            continue
        if hasattr(getattr(runtime_option, attr), "__call__"):
            continue
        message += "  {} : {}\t\n".format(attr, getattr(runtime_option, attr))
    message.strip("\n")
    message += ")"
--- a/fastdeploy/vision.h
+++ b/fastdeploy/vision.h
@@ -15,16 +15,17 @@
 #include "fastdeploy/core/config.h"
 #ifdef ENABLE_VISION
 #include "fastdeploy/vision/deepcam/yolov5face.h"
 #include "fastdeploy/vision/megvii/yolox.h"
 #include "fastdeploy/vision/meituan/yolov6.h"
 #include "fastdeploy/vision/ppcls/model.h"
 #include "fastdeploy/vision/ppdet/ppyoloe.h"
 #include "fastdeploy/vision/rangilyu/nanodet_plus.h"
 #include "fastdeploy/vision/ppseg/model.h"
 #include "fastdeploy/vision/rangilyu/nanodet_plus.h"
 #include "fastdeploy/vision/ultralytics/yolov5.h"
 #include "fastdeploy/vision/wongkinyiu/scaledyolov4.h"
 #include "fastdeploy/vision/wongkinyiu/yolor.h"
 #include "fastdeploy/vision/wongkinyiu/yolov7.h"
 #include "fastdeploy/vision/wongkinyiu/scaledyolov4.h"
 #endif
 #include "fastdeploy/vision/visualize/visualize.h"
--- a/fastdeploy/vision/init.py
+++ b/fastdeploy/vision/init.py
@@ -22,4 +22,5 @@ from . import meituan
 from . import megvii
 from . import visualize
 from . import wongkinyiu
 from . import deepcam
 from . import rangilyu
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -72,6 +72,73 @@ std::string DetectionResult::Str() {
  return out;
 }
 FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) {
  boxes.assign(res.boxes.begin(), res.boxes.end());
  landmarks.assign(res.landmarks.begin(), res.landmarks.end());
  scores.assign(res.scores.begin(), res.scores.end());
  landmarks_per_face = res.landmarks_per_face;
 }
 void FaceDetectionResult::Clear() {
  std::vector<std::array<float, 4>>().swap(boxes);
  std::vector<float>().swap(scores);
  std::vector<std::array<float, 2>>().swap(landmarks);
  landmarks_per_face = 0;
 }
 void FaceDetectionResult::Reserve(int size) {
  boxes.reserve(size);
  scores.reserve(size);
  if (landmarks_per_face > 0) {
    landmarks.reserve(size * landmarks_per_face);
  }
 }
 void FaceDetectionResult::Resize(int size) {
  boxes.resize(size);
  scores.resize(size);
  if (landmarks_per_face > 0) {
    landmarks.resize(size * landmarks_per_face);
  }
 }
 std::string FaceDetectionResult::Str() {
  std::string out;
  // format without landmarks
  if (landmarks_per_face <= 0) {
    out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n";
    for (size_t i = 0; i < boxes.size(); ++i) {
      out = out + std::to_string(boxes[i][0]) + "," +
            std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
            ", " + std::to_string(boxes[i][3]) + ", " +
            std::to_string(scores[i]) + "\n";
    }
    return out;
  }
  // format with landmarks
  FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face),
           "The size of landmarks != boxes.size * landmarks_per_face.");
  out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " +
        std::to_string(landmarks_per_face) + "]\n";
  for (size_t i = 0; i < boxes.size(); ++i) {
    out = out + std::to_string(boxes[i][0]) + "," +
          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
          ", " + std::to_string(boxes[i][3]) + ", " +
          std::to_string(scores[i]) + ", ";
    for (size_t j = 0; j < landmarks_per_face; ++j) {
      out = out + "(" +
            std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," +
            std::to_string(landmarks[i * landmarks_per_face + j][1]);
      if (j < landmarks_per_face - 1) {
        out = out + "), ";
      } else {
        out = out + ")\n";
      }
    }
  }
  return out;
 }
 void SegmentationResult::Clear() {
  std::vector<std::vector<int64_t>>().swap(masks);
 }
--- a/fastdeploy/vision/common/result.h
+++ b/fastdeploy/vision/common/result.h
@@ -21,7 +21,8 @@ enum FASTDEPLOY_DECL ResultType {
  UNKNOWN_RESULT,
  CLASSIFY,
  DETECTION,
-  SEGMENTATION
+  SEGMENTATION,
  FACE_DETECTION
 };
 struct FASTDEPLOY_DECL BaseResult {
@@ -56,6 +57,31 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
  std::string Str();
 };
 struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
  // box: xmin, ymin, xmax, ymax
  std::vector<std::array<float, 4>> boxes;
  // landmark: x, y, landmarks may empty if the
  // model don't detect face with landmarks.
  // Note, one face might have multiple landmarks,
  // such as 5/19/21/68/98/..., etc.
  std::vector<std::array<float, 2>> landmarks;
  std::vector<float> scores;
  ResultType type = ResultType::FACE_DETECTION;
  // set landmarks_per_face manually in your post processes.
  int landmarks_per_face;
  FaceDetectionResult() { landmarks_per_face = 0; }
  FaceDetectionResult(const FaceDetectionResult& res);
  void Clear();
  void Reserve(int size);
  void Resize(int size);
  std::string Str();
 };
 struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
  // mask
  std::vector<std::vector<int64_t>> masks;
--- a/fastdeploy/vision/deepcam/init.py
+++ b/fastdeploy/vision/deepcam/init.py
@@ -0,0 +1,117 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 import logging
 from ... import FastDeployModel, Frontend
 from ... import fastdeploy_main as C
 class YOLOv5Face(FastDeployModel):
    def __init__(self,
                 model_file,
                 params_file="",
                 runtime_option=None,
                 model_format=Frontend.ONNX):
        # 调用基函数进行backend_option的初始化
        # 初始化后的option保存在self._runtime_option
        super(YOLOv5Face, self).__init__(runtime_option)
        self._model = C.vision.deepcam.YOLOv5Face(
            model_file, params_file, self._runtime_option, model_format)
        # 通过self.initialized判断整个模型的初始化是否成功
        assert self.initialized, "YOLOv5Face initialize failed."
    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
        return self._model.predict(input_image, conf_threshold,
                                   nms_iou_threshold)
    # 一些跟YOLOv5Face模型有关的属性封装
    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
    @property
    def size(self):
        return self._model.size
    @property
    def padding_value(self):
        return self._model.padding_value
    @property
    def is_no_pad(self):
        return self._model.is_no_pad
    @property
    def is_mini_pad(self):
        return self._model.is_mini_pad
    @property
    def is_scale_up(self):
        return self._model.is_scale_up
    @property
    def stride(self):
        return self._model.stride
    @property
    def landmarks_per_face(self):
        return self._model.landmarks_per_face
    @size.setter
    def size(self, wh):
        assert isinstance(wh, [list, tuple]),\
            "The value to set `size` must be type of tuple or list."
        assert len(wh) == 2,\
            "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
            len(wh))
        self._model.size = wh
    @padding_value.setter
    def padding_value(self, value):
        assert isinstance(
            value,
            list), "The value to set `padding_value` must be type of list."
        self._model.padding_value = value
    @is_no_pad.setter
    def is_no_pad(self, value):
        assert isinstance(
            value, bool), "The value to set `is_no_pad` must be type of bool."
        self._model.is_no_pad = value
    @is_mini_pad.setter
    def is_mini_pad(self, value):
        assert isinstance(
            value,
            bool), "The value to set `is_mini_pad` must be type of bool."
        self._model.is_mini_pad = value
    @is_scale_up.setter
    def is_scale_up(self, value):
        assert isinstance(
            value,
            bool), "The value to set `is_scale_up` must be type of bool."
        self._model.is_scale_up = value
    @stride.setter
    def stride(self, value):
        assert isinstance(
            value, int), "The value to set `stride` must be type of int."
        self._model.stride = value
    @landmarks_per_face.setter
    def landmarks_per_face(self, value):
        assert isinstance(
            value,
            int), "The value to set `landmarks_per_face` must be type of int."
        self._model.landmarks_per_face = value
--- a/fastdeploy/vision/deepcam/deepcam_pybind.cc
+++ b/fastdeploy/vision/deepcam/deepcam_pybind.cc
@@ -0,0 +1,43 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/pybind/main.h"
 namespace fastdeploy {
 void BindDeepCam(pybind11::module& m) {
  auto deepcam_module =
      m.def_submodule("deepcam", "https://github.com/deepcam-cn/yolov5-face");
  pybind11::class_<vision::deepcam::YOLOv5Face, FastDeployModel>(deepcam_module,
                                                                 "YOLOv5Face")
      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
      .def("predict",
           [](vision::deepcam::YOLOv5Face& self, pybind11::array& data,
              float conf_threshold, float nms_iou_threshold) {
             auto mat = PyArrayToCvMat(data);
             vision::FaceDetectionResult res;
             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
             return res;
           })
      .def_readwrite("size", &vision::deepcam::YOLOv5Face::size)
      .def_readwrite("padding_value",
                     &vision::deepcam::YOLOv5Face::padding_value)
      .def_readwrite("is_mini_pad", &vision::deepcam::YOLOv5Face::is_mini_pad)
      .def_readwrite("is_no_pad", &vision::deepcam::YOLOv5Face::is_no_pad)
      .def_readwrite("is_scale_up", &vision::deepcam::YOLOv5Face::is_scale_up)
      .def_readwrite("stride", &vision::deepcam::YOLOv5Face::stride)
      .def_readwrite("landmarks_per_face",
                     &vision::deepcam::YOLOv5Face::landmarks_per_face);
 }
 }  // namespace fastdeploy
--- a/fastdeploy/vision/deepcam/yolov5face.cc
+++ b/fastdeploy/vision/deepcam/yolov5face.cc
@@ -0,0 +1,292 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision/deepcam/yolov5face.h"
 #include "fastdeploy/utils/perf.h"
 #include "fastdeploy/vision/utils/utils.h"
 namespace fastdeploy {
 namespace vision {
 namespace deepcam {
 void LetterBox(Mat* mat, std::vector<int> size, std::vector<float> color,
               bool _auto, bool scale_fill = false, bool scale_up = true,
               int stride = 32) {
  float scale =
      std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width());
  if (!scale_up) {
    scale = std::min(scale, 1.0f);
  }
  int resize_h = int(round(mat->Height() * scale));
  int resize_w = int(round(mat->Width() * scale));
  int pad_w = size[0] - resize_w;
  int pad_h = size[1] - resize_h;
  if (_auto) {
    pad_h = pad_h % stride;
    pad_w = pad_w % stride;
  } else if (scale_fill) {
    pad_h = 0;
    pad_w = 0;
    resize_h = size[1];
    resize_w = size[0];
  }
  if (resize_h != mat->Height() || resize_w != mat->Width()) {
    Resize::Run(mat, resize_w, resize_h);
  }
  if (pad_h > 0 || pad_w > 0) {
    float half_h = pad_h * 1.0 / 2;
    int top = int(round(half_h - 0.1));
    int bottom = int(round(half_h + 0.1));
    float half_w = pad_w * 1.0 / 2;
    int left = int(round(half_w - 0.1));
    int right = int(round(half_w + 0.1));
    Pad::Run(mat, top, bottom, left, right, color);
  }
 }
 YOLOv5Face::YOLOv5Face(const std::string& model_file,
                       const std::string& params_file,
                       const RuntimeOption& custom_option,
                       const Frontend& model_format) {
  if (model_format == Frontend::ONNX) {
    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
  runtime_option.params_file = params_file;
  initialized = Initialize();
 }
 bool YOLOv5Face::Initialize() {
  // parameters for preprocess
  size = {640, 640};
  padding_value = {114.0, 114.0, 114.0};
  is_mini_pad = false;
  is_no_pad = false;
  is_scale_up = false;
  stride = 32;
  landmarks_per_face = 5;
  if (!InitRuntime()) {
    FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
    return false;
  }
  // Check if the input shape is dynamic after Runtime already initialized,
  // Note that, We need to force is_mini_pad 'false' to keep static
  // shape after padding (LetterBox) when the is_dynamic_input_ is 'false'.
  is_dynamic_input_ = false;
  auto shape = InputInfoOfRuntime(0).shape;
  for (int i = 0; i < shape.size(); ++i) {
    // if height or width is dynamic
    if (i >= 2 && shape[i] <= 0) {
      is_dynamic_input_ = true;
      break;
    }
  }
  if (!is_dynamic_input_) {
    is_mini_pad = false;
  }
  return true;
 }
 bool YOLOv5Face::Preprocess(
    Mat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // process after image load
  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
                         size[0] * 1.0f / static_cast<float>(mat->Width()));
  if (ratio != 1.0) {  // always true
    int interp = cv::INTER_AREA;
    if (ratio > 1.0) {
      interp = cv::INTER_LINEAR;
    }
    int resize_h = int(round(static_cast<float>(mat->Height()) * ratio));
    int resize_w = int(round(static_cast<float>(mat->Width()) * ratio));
    Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
  }
  // yolov5face's preprocess steps
  // 1. letterbox
  // 2. BGR->RGB
  // 3. HWC->CHW
  LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up,
            stride);
  BGR2RGB::Run(mat);
  // Normalize::Run(mat, std::vector<float>(mat->Channels(), 0.0),
  //                std::vector<float>(mat->Channels(), 1.0));
  // Compute `result = mat * alpha + beta` directly by channel
  std::vector<float> alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
  std::vector<float> beta = {0.0f, 0.0f, 0.0f};
  Convert::Run(mat, alpha, beta);
  // Record output shape of preprocessed image
  (*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
                                static_cast<float>(mat->Width())};
  HWC2CHW::Run(mat);
  Cast::Run(mat, "float");
  mat->ShareWithTensor(output);
  output->shape.insert(output->shape.begin(), 1);  // reshape to n, h, w, c
  return true;
 }
 bool YOLOv5Face::Postprocess(
    FDTensor& infer_result, FaceDetectionResult* result,
    const std::map<std::string, std::array<float, 2>>& im_info,
    float conf_threshold, float nms_iou_threshold) {
  // infer_result: (1,n,16) 16=4+1+10+1
  FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now.");
  result->Clear();
  // must be setup landmarks_per_face before reserve
  result->landmarks_per_face = landmarks_per_face;
  result->Reserve(infer_result.shape[1]);
  if (infer_result.dtype != FDDataType::FP32) {
    FDERROR << "Only support post process with float32 data." << std::endl;
    return false;
  }
  float* data = static_cast<float*>(infer_result.Data());
  for (size_t i = 0; i < infer_result.shape[1]; ++i) {
    float* reg_cls_ptr = data + (i * infer_result.shape[2]);
    float obj_conf = reg_cls_ptr[4];
    float cls_conf = reg_cls_ptr[15];
    float confidence = obj_conf * cls_conf;
    // filter boxes by conf_threshold
    if (confidence <= conf_threshold) {
      continue;
    }
    float x = reg_cls_ptr[0];
    float y = reg_cls_ptr[1];
    float w = reg_cls_ptr[2];
    float h = reg_cls_ptr[3];
    // convert from [x, y, w, h] to [x1, y1, x2, y2]
    result->boxes.emplace_back(std::array<float, 4>{
        (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
    result->scores.push_back(confidence);
    // decode landmarks (default 5 landmarks)
    if (landmarks_per_face > 0) {
      float* landmarks_ptr = reg_cls_ptr + 5;
      for (size_t j = 0; j < landmarks_per_face * 2; j += 2) {
        result->landmarks.emplace_back(
            std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
      }
    }
  }
  if (result->boxes.size() == 0) {
    return true;
  }
  utils::NMS(result, nms_iou_threshold);
  // scale the boxes to the origin image shape
  auto iter_out = im_info.find("output_shape");
  auto iter_ipt = im_info.find("input_shape");
  FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(),
           "Cannot find input_shape or output_shape from im_info.");
  float out_h = iter_out->second[0];
  float out_w = iter_out->second[1];
  float ipt_h = iter_ipt->second[0];
  float ipt_w = iter_ipt->second[1];
  float scale = std::min(out_h / ipt_h, out_w / ipt_w);
  float pad_h = (out_h - ipt_h * scale) / 2.f;
  float pad_w = (out_w - ipt_w * scale) / 2.f;
  if (is_mini_pad) {
    pad_h = static_cast<float>(static_cast<int>(pad_h) % stride);
    pad_w = static_cast<float>(static_cast<int>(pad_w) % stride);
  }
  // scale and clip box
  for (size_t i = 0; i < result->boxes.size(); ++i) {
    result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f);
    result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f);
    result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f);
    result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f);
    result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f);
    result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f);
    result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f);
    result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f);
  }
  // scale and clip landmarks
  for (size_t i = 0; i < result->landmarks.size(); ++i) {
    result->landmarks[i][0] =
        std::max((result->landmarks[i][0] - pad_w) / scale, 0.0f);
    result->landmarks[i][1] =
        std::max((result->landmarks[i][1] - pad_h) / scale, 0.0f);
    result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f);
    result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f);
  }
  return true;
 }
 bool YOLOv5Face::Predict(cv::Mat* im, FaceDetectionResult* result,
                         float conf_threshold, float nms_iou_threshold) {
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_START(0)
 #endif
  Mat mat(*im);
  std::vector<FDTensor> input_tensors(1);
  std::map<std::string, std::array<float, 2>> im_info;
  // Record the shape of image and the shape of preprocessed image
  im_info["input_shape"] = {static_cast<float>(mat.Height()),
                            static_cast<float>(mat.Width())};
  im_info["output_shape"] = {static_cast<float>(mat.Height()),
                             static_cast<float>(mat.Width())};
  if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
    FDERROR << "Failed to preprocess input image." << std::endl;
    return false;
  }
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_END(0, "Preprocess")
  TIMERECORD_START(1)
 #endif
  input_tensors[0].name = InputInfoOfRuntime(0).name;
  std::vector<FDTensor> output_tensors;
  if (!Infer(input_tensors, &output_tensors)) {
    FDERROR << "Failed to inference." << std::endl;
    return false;
  }
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_END(1, "Inference")
  TIMERECORD_START(2)
 #endif
  if (!Postprocess(output_tensors[0], result, im_info, conf_threshold,
                   nms_iou_threshold)) {
    FDERROR << "Failed to post process." << std::endl;
    return false;
  }
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_END(2, "Postprocess")
 #endif
  return true;
 }
 }  // namespace deepcam
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/deepcam/yolov5face.h
+++ b/fastdeploy/vision/deepcam/yolov5face.h
@@ -0,0 +1,97 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "fastdeploy/fastdeploy_model.h"
 #include "fastdeploy/vision/common/processors/transform.h"
 #include "fastdeploy/vision/common/result.h"
 namespace fastdeploy {
 namespace vision {
 namespace deepcam {
 class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel {
 public:
  // 当model_format为ONNX时，无需指定params_file
  // 当model_format为Paddle时，则需同时指定model_file & params_file
  YOLOv5Face(const std::string& model_file, const std::string& params_file = "",
             const RuntimeOption& custom_option = RuntimeOption(),
             const Frontend& model_format = Frontend::ONNX);
  // 定义模型的名称
  std::string ModelName() const { return "deepcam-cn/yolov5-face"; }
  // 模型预测接口，即用户调用的接口
  // im 为用户的输入数据，目前对于CV均定义为cv::Mat
  // result 为模型预测的输出结构体
  // conf_threshold 为后处理的参数
  // nms_iou_threshold 为后处理的参数
  virtual bool Predict(cv::Mat* im, FaceDetectionResult* result,
                       float conf_threshold = 0.25,
                       float nms_iou_threshold = 0.5);
  // 以下为模型在预测时的一些参数，基本是前后处理所需
  // 用户在创建模型后，可根据模型的要求，以及自己的需求
  // 对参数进行修改
  // tuple of (width, height)
  std::vector<int> size;
  // padding value, size should be same with Channels
  std::vector<float> padding_value;
  // only pad to the minimum rectange which height and width is times of stride
  bool is_mini_pad;
  // while is_mini_pad = false and is_no_pad = true, will resize the image to
  // the set size
  bool is_no_pad;
  // if is_scale_up is false, the input image only can be zoom out, the maximum
  // resize scale cannot exceed 1.0
  bool is_scale_up;
  // padding stride, for is_mini_pad
  int stride;
  // setup the number of landmarks for per face (if have), default 5 in
  // official yolov5face note that, the outupt tensor's shape must be:
  // (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls)
  int landmarks_per_face;
 private:
  // 初始化函数，包括初始化后端，以及其它模型推理需要涉及的操作
  bool Initialize();
  // 输入图像预处理操作
  // Mat为FastDeploy定义的数据结构
  // FDTensor为预处理后的Tensor数据，传给后端进行推理
  // im_info为预处理过程保存的数据，在后处理中需要用到
  bool Preprocess(Mat* mat, FDTensor* outputs,
                  std::map<std::string, std::array<float, 2>>* im_info);
  // 后端推理结果后处理，输出给用户
  // infer_result 为后端推理后的输出Tensor
  // result 为模型预测的结果
  // im_info 为预处理记录的信息，后处理用于还原box
  // conf_threshold 后处理时过滤box的置信度阈值
  // nms_iou_threshold 后处理时NMS设定的iou阈值
  bool Postprocess(FDTensor& infer_result, FaceDetectionResult* result,
                   const std::map<std::string, std::array<float, 2>>& im_info,
                   float conf_threshold, float nms_iou_threshold);
  // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致
  bool IsDynamicInput() const { return is_dynamic_input_; }
  bool is_dynamic_input_;
 };
 }  // namespace deepcam
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/utils/nms.cc
+++ b/fastdeploy/vision/utils/nms.cc
@@ -66,6 +66,62 @@ void NMS(DetectionResult* result, float iou_threshold) {
  }
 }
 void NMS(FaceDetectionResult* result, float iou_threshold) {
  utils::SortDetectionResult(result);
  std::vector<float> area_of_boxes(result->boxes.size());
  std::vector<int> suppressed(result->boxes.size(), 0);
  for (size_t i = 0; i < result->boxes.size(); ++i) {
    area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) *
                       (result->boxes[i][3] - result->boxes[i][1]);
  }
  for (size_t i = 0; i < result->boxes.size(); ++i) {
    if (suppressed[i] == 1) {
      continue;
    }
    for (size_t j = i + 1; j < result->boxes.size(); ++j) {
      if (suppressed[j] == 1) {
        continue;
      }
      float xmin = std::max(result->boxes[i][0], result->boxes[j][0]);
      float ymin = std::max(result->boxes[i][1], result->boxes[j][1]);
      float xmax = std::min(result->boxes[i][2], result->boxes[j][2]);
      float ymax = std::min(result->boxes[i][3], result->boxes[j][3]);
      float overlap_w = std::max(0.0f, xmax - xmin);
      float overlap_h = std::max(0.0f, ymax - ymin);
      float overlap_area = overlap_w * overlap_h;
      float overlap_ratio =
          overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area);
      if (overlap_ratio > iou_threshold) {
        suppressed[j] = 1;
      }
    }
  }
  FaceDetectionResult backup(*result);
  int landmarks_per_face = result->landmarks_per_face;
  result->Clear();
  // don't forget to reset the landmarks_per_face
  // before apply Reserve method.
  result->landmarks_per_face = landmarks_per_face;
  result->Reserve(suppressed.size());
  for (size_t i = 0; i < suppressed.size(); ++i) {
    if (suppressed[i] == 1) {
      continue;
    }
    result->boxes.emplace_back(backup.boxes[i]);
    result->scores.push_back(backup.scores[i]);
    // landmarks (if have)
    if (result->landmarks_per_face > 0) {
      for (size_t j = 0; j < result->landmarks_per_face; ++j) {
        result->landmarks.emplace_back(
            backup.landmarks[i * result->landmarks_per_face + j]);
      }
    }
  }
 }
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/utils/sort_face_det_res.cc
+++ b/fastdeploy/vision/utils/sort_face_det_res.cc
@@ -0,0 +1,69 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision/utils/utils.h"
 namespace fastdeploy {
 namespace vision {
 namespace utils {
 void SortDetectionResult(FaceDetectionResult* result) {
  // sort face detection results with landmarks or not.
  if (result->boxes.size() == 0) {
    return;
  }
  int landmarks_per_face = result->landmarks_per_face;
  if (landmarks_per_face > 0) {
    FDASSERT(
        (result->landmarks.size() == result->boxes.size() * landmarks_per_face),
        "The size of landmarks != boxes.size * landmarks_per_face.");
  }
  // argsort for scores.
  std::vector<size_t> indices;
  indices.resize(result->boxes.size());
  for (size_t i = 0; i < result->boxes.size(); ++i) {
    indices[i] = i;
  }
  std::vector<float>& scores = result->scores;
  std::sort(indices.begin(), indices.end(),
            [&scores](size_t a, size_t b) { return scores[a] > scores[b]; });
  // reorder boxes, scores, landmarks (if have).
  FaceDetectionResult backup(*result);
  result->Clear();
  // don't forget to reset the landmarks_per_face
  // before apply Reserve method.
  result->landmarks_per_face = landmarks_per_face;
  result->Reserve(indices.size());
  if (landmarks_per_face > 0) {
    for (size_t i = 0; i < indices.size(); ++i) {
      result->boxes.emplace_back(backup.boxes[indices[i]]);
      result->scores.push_back(backup.scores[indices[i]]);
      for (size_t j = 0; j < landmarks_per_face; ++j) {
        result->landmarks.emplace_back(
            backup.landmarks[indices[i] * landmarks_per_face + j]);
      }
    }
  } else {
    for (size_t i = 0; i < indices.size(); ++i) {
      result->boxes.emplace_back(backup.boxes[indices[i]]);
      result->scores.push_back(backup.scores[indices[i]]);
    }
  }
 }
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/utils/utils.h
+++ b/fastdeploy/vision/utils/utils.h
@@ -53,9 +53,13 @@ std::vector<int32_t> TopKIndices(const T* array, int array_size, int topk) {
 void NMS(DetectionResult* output, float iou_threshold = 0.5);
 void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
 // MergeSort
 void SortDetectionResult(DetectionResult* output);
 void SortDetectionResult(FaceDetectionResult* result);
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -23,6 +23,7 @@ void BindPPSeg(pybind11::module& m);
 void BindUltralytics(pybind11::module& m);
 void BindMeituan(pybind11::module& m);
 void BindMegvii(pybind11::module& m);
 void BindDeepCam(pybind11::module& m);
 void BindRangiLyu(pybind11::module& m);
 #ifdef ENABLE_VISION_VISUALIZE
 void BindVisualize(pybind11::module& m);
@@ -44,6 +45,15 @@ void BindVision(pybind11::module& m) {
      .def("__repr__", &vision::DetectionResult::Str)
      .def("__str__", &vision::DetectionResult::Str);
  pybind11::class_<vision::FaceDetectionResult>(m, "FaceDetectionResult")
      .def(pybind11::init())
      .def_readwrite("boxes", &vision::FaceDetectionResult::boxes)
      .def_readwrite("scores", &vision::FaceDetectionResult::scores)
      .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks)
      .def_readwrite("landmarks_per_face",
                     &vision::FaceDetectionResult::landmarks_per_face)
      .def("__repr__", &vision::FaceDetectionResult::Str)
      .def("__str__", &vision::FaceDetectionResult::Str);
  pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
      .def(pybind11::init())
      .def_readwrite("masks", &vision::SegmentationResult::masks)
@@ -57,6 +67,7 @@ void BindVision(pybind11::module& m) {
  BindWongkinyiu(m);
  BindMeituan(m);
  BindMegvii(m);
  BindDeepCam(m);
  BindRangiLyu(m);
 #ifdef ENABLE_VISION_VISUALIZE
  BindVisualize(m);
--- a/fastdeploy/vision/visualize/init.py
+++ b/fastdeploy/vision/visualize/init.py
@@ -21,6 +21,11 @@ def vis_detection(im_data, det_result, line_size=1, font_size=0.5):
    C.vision.Visualize.vis_detection(im_data, det_result, line_size, font_size)
 def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5):
    C.vision.Visualize.vis_face_detection(im_data, face_det_result, line_size,
                                          font_size)
 def vis_segmentation(im_data, seg_result, vis_im_data, num_classes=1000):
    C.vision.Visualize.vis_segmentation(im_data, seg_result, vis_im_data,
                                        num_classes)
--- a/fastdeploy/vision/visualize/face_detection.cc
+++ b/fastdeploy/vision/visualize/face_detection.cc
@@ -0,0 +1,81 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef ENABLE_VISION_VISUALIZE
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "opencv2/imgproc/imgproc.hpp"
 namespace fastdeploy {
 namespace vision {
 // Default only support visualize num_classes <= 1000
 // If need to visualize num_classes > 1000
 // Please call Visualize::GetColorMap(num_classes) first
 void Visualize::VisFaceDetection(cv::Mat* im, const FaceDetectionResult& result,
                                 int line_size, float font_size) {
  auto color_map = GetColorMap();
  int h = im->rows;
  int w = im->cols;
  bool vis_landmarks = false;
  if ((result.landmarks_per_face > 0) &&
      (result.boxes.size() * result.landmarks_per_face ==
       result.landmarks.size())) {
    vis_landmarks = true;
  }
  for (size_t i = 0; i < result.boxes.size(); ++i) {
    cv::Rect rect(result.boxes[i][0], result.boxes[i][1],
                  result.boxes[i][2] - result.boxes[i][0],
                  result.boxes[i][3] - result.boxes[i][1]);
    int color_id = i % 333;
    int c0 = color_map[3 * color_id + 0];
    int c1 = color_map[3 * color_id + 1];
    int c2 = color_map[3 * color_id + 2];
    cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
    std::string text = std::to_string(result.scores[i]);
    if (text.size() > 4) {
      text = text.substr(0, 4);
    }
    int font = cv::FONT_HERSHEY_SIMPLEX;
    cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
    cv::Point origin;
    origin.x = rect.x;
    origin.y = rect.y;
    cv::Rect text_background =
        cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height,
                 text_size.width, text_size.height);
    cv::rectangle(*im, rect, rect_color, line_size);
    cv::putText(*im, text, origin, font, font_size, cv::Scalar(255, 255, 255),
                1);
    // vis landmarks (if have)
    if (vis_landmarks) {
      cv::Scalar landmark_color = rect_color;
      for (size_t j = 0; j < result.landmarks_per_face; ++j) {
        cv::Point landmark;
        landmark.x = static_cast<int>(
            result.landmarks[i * result.landmarks_per_face + j][0]);
        landmark.y = static_cast<int>(
            result.landmarks[i * result.landmarks_per_face + j][1]);
        cv::circle(*im, landmark, line_size, landmark_color, -1);
      }
    }
  }
 }
 }  // namespace vision
 }  // namespace fastdeploy
 #endif
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -27,6 +27,8 @@ class FASTDEPLOY_DECL Visualize {
  static const std::vector<int>& GetColorMap(int num_classes = 1000);
  static void VisDetection(cv::Mat* im, const DetectionResult& result,
                           int line_size = 2, float font_size = 0.5f);
  static void VisFaceDetection(cv::Mat* im, const FaceDetectionResult& result,
                               int line_size = 2, float font_size = 0.5f);
  static void VisSegmentation(const cv::Mat& im,
                              const SegmentationResult& result,
                              cv::Mat* vis_img, const int& num_classes = 1000);
--- a/fastdeploy/vision/visualize/visualize_pybind.cc
+++ b/fastdeploy/vision/visualize/visualize_pybind.cc
@@ -25,6 +25,14 @@ void BindVisualize(pybind11::module& m) {
                    vision::Visualize::VisDetection(&im, result, line_size,
                                                    font_size);
                  })
      .def_static(
          "vis_face_detection",
          [](pybind11::array& im_data, vision::FaceDetectionResult& result,
             int line_size, float font_size) {
            auto im = PyArrayToCvMat(im_data);
            vision::Visualize::VisFaceDetection(&im, result, line_size,
                                                font_size);
          })
      .def_static("vis_segmentation", [](pybind11::array& im_data,
                                         vision::SegmentationResult& result,
                                         pybind11::array& vis_im_data,
--- a/fastdeploy/vision/wongkinyiu/scaledyolov4.cc
+++ b/fastdeploy/vision/wongkinyiu/scaledyolov4.cc
@@ -57,8 +57,10 @@ void ScaledYOLOv4::LetterBox(Mat* mat, const std::vector<int>& size,
  }
 }
-ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file, const std::string& params_file,
+ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
-             const RuntimeOption& custom_option, const Frontend& model_format) {
+                           const std::string& params_file,
                           const RuntimeOption& custom_option,
                           const Frontend& model_format) {
  if (model_format == Frontend::ONNX) {
    valid_cpu_backends = {Backend::ORT};  // 指定可用的CPU后端
    valid_gpu_backends = {Backend::ORT, Backend::TRT};  // 指定可用的GPU后端
@@ -90,7 +92,8 @@ bool ScaledYOLOv4::Initialize() {
  return true;
 }
-bool ScaledYOLOv4::Preprocess(Mat* mat, FDTensor* output,
+bool ScaledYOLOv4::Preprocess(
    Mat* mat, FDTensor* output,
    std::map<std::string, std::array<float, 2>>* im_info) {
  // process after image load
  float ratio = std::min(size[1] * 1.0f / static_cast<float>(mat->Height()),
@@ -199,8 +202,8 @@ bool ScaledYOLOv4::Postprocess(
  return true;
 }
-bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,
+bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result,
-                    float nms_iou_threshold) {
+                           float conf_threshold, float nms_iou_threshold) {
 #ifdef FASTDEPLOY_DEBUG
  TIMERECORD_START(0)
 #endif
--- a/fastdeploy/vision/wongkinyiu/scaledyolov4.h
+++ b/fastdeploy/vision/wongkinyiu/scaledyolov4.h
@@ -25,7 +25,8 @@ class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel {
 public:
  // 当model_format为ONNX时，无需指定params_file
  // 当model_format为Paddle时，则需同时指定model_file & params_file
-  ScaledYOLOv4(const std::string& model_file, const std::string& params_file = "",
+  ScaledYOLOv4(const std::string& model_file,
               const std::string& params_file = "",
               const RuntimeOption& custom_option = RuntimeOption(),
               const Frontend& model_format = Frontend::ONNX);
--- a/model_zoo/vision/yolov5face/README.md
+++ b/model_zoo/vision/yolov5face/README.md
@@ -0,0 +1,78 @@
 # YOLOv5Face部署示例
 当前支持模型版本为：[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead)
 本文档说明如何进行[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)的快速部署推理。本目录结构如下
 ```
 .
 ├── cpp                     # C++ 代码目录
 │   ├── CMakeLists.txt      # C++ 代码编译CMakeLists文件
 │   ├── README.md           # C++ 代码编译部署文档
 │   └── yolov5face.cc       # C++ 示例代码
 ├── api.md                  # API 说明文档
 ├── README.md               # YOLOv5Face 部署文档
 └── yolov5face.py           # Python示例代码
 ```
 ## 获取ONNX文件
 访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库，按照指引下载安装，下载`yolov5s-face.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
 * 下载yolov5face模型文件
  ```
  Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q  
  https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing
  ```
 * 导出onnx格式文件
  ```bash
  PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1  
  ```
 * onnx模型简化(可选)
  ```bash
  onnxsim yolov5s-face.onnx yolov5s-face.onnx
  ```
 * 移动onnx文件到model_zoo/yolov5face的目录
  ```bash
  cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/
  ```
 ## 准备测试图片
 准备一张包含人脸的测试图片，命名为test.jpg，并拷贝到可执行文件所在的目录
 ## 安装FastDeploy
 使用如下命令安装FastDeploy，注意到此处安装的是`vision-cpu`，也可根据需求安装`vision-gpu`
 ```bash
 # 安装fastdeploy-python工具
 pip install fastdeploy-python
 # 安装vision-cpu模块
 fastdeploy install vision-cpu
 ```
 ## Python部署
 执行如下代码即会自动下载YOLOv5Face模型和测试图片
 ```bash
 python yolov5face.py
 ```
 执行完成后会将可视化结果保存在本地`vis_result.jpg`，同时输出检测结果如下
 ```
 FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
 749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470)
 897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066)
 281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048)
 132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197)
 699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794)
 # ...
 ```
 ## 其它文档
 - [C++部署](./cpp/README.md)
 - [YOLOv5Face API文档](./api.md)
--- a/model_zoo/vision/yolov5face/api.md
+++ b/model_zoo/vision/yolov5face/api.md
@@ -0,0 +1,71 @@
 # YOLOv5Face API说明
 ## Python API
 ### YOLOv5Face类
 ```
 fastdeploy.vision.deepcam.YOLOv5Face(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX)
 ```
 YOLOv5Face模型加载和初始化，当model_format为`fd.Frontend.ONNX`时，只需提供model_file，如`yolov5s-face.onnx`；当model_format为`fd.Frontend.PADDLE`时，则需同时提供model_file和params_file。
 **参数**
 > * **model_file**(str): 模型文件路径
 > * **params_file**(str): 参数文件路径
 > * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
 > * **model_format**(Frontend): 模型格式
 #### predict函数
 > ```
 > YOLOv5Face.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5)
 > ```
 > 模型预测结口，输入图像直接输出检测结果。
 >
 > **参数**
 >
 > > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
 > > * **conf_threshold**(float): 检测框置信度过滤阈值
 > > * **nms_iou_threshold**(float): NMS处理过程中iou阈值
 示例代码参考[yolov5face.py](./yolov5face.py)
 ## C++ API
 ### YOLOv5Face类
 ```
 fastdeploy::vision::deepcam::YOLOv5Face(
        const string& model_file,
        const string& params_file = "",
        const RuntimeOption& runtime_option = RuntimeOption(),
        const Frontend& model_format = Frontend::ONNX)
 ```
 YOLOv5Face模型加载和初始化，当model_format为`Frontend::ONNX`时，只需提供model_file，如`yolov5s-face.onnx`；当model_format为`Frontend::PADDLE`时，则需同时提供model_file和params_file。
 **参数**
 > * **model_file**(str): 模型文件路径
 > * **params_file**(str): 参数文件路径
 > * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
 > * **model_format**(Frontend): 模型格式
 #### Predict函数
 > ```
 > YOLOv5Face::Predict(cv::Mat* im, DetectionResult* result,
 >                     float conf_threshold = 0.25,
 >                     float nms_iou_threshold = 0.5)
 > ```
 > 模型预测接口，输入图像直接输出检测结果。
 >
 > **参数**
 >
 > > * **im**: 输入图像，注意需为HWC，BGR格式
 > > * **result**: 检测结果，包括检测框，各个框的置信度
 > > * **conf_threshold**: 检测框置信度过滤阈值
 > > * **nms_iou_threshold**: NMS处理过程中iou阈值
 示例代码参考[cpp/yolov5face.cc](cpp/yolov5face.cc)
 ## 其它API使用
 - [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md)
--- a/model_zoo/vision/yolov5face/cpp/CMakeLists.txt
+++ b/model_zoo/vision/yolov5face/cpp/CMakeLists.txt
@@ -0,0 +1,17 @@
 PROJECT(yolov5face_demo C CXX)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
 # 在低版本ABI环境中，通过如下代码进行兼容性编译
 # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
 # 指定下载解压后的fastdeploy库路径
 set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 # 添加FastDeploy依赖头文件
 include_directories(${FASTDEPLOY_INCS})
 add_executable(yolov5face_demo ${PROJECT_SOURCE_DIR}/yolov5face.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(yolov5face_demo ${FASTDEPLOY_LIBS})
--- a/model_zoo/vision/yolov5face/cpp/README.md
+++ b/model_zoo/vision/yolov5face/cpp/README.md
@@ -0,0 +1,60 @@
 # 编译YOLOv5Face示例
 当前支持模型版本为：[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead)
 ## 下载和解压预测库
 ```bash
 wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
 tar xvf fastdeploy-linux-x64-0.0.3.tgz
 ```
 ## 编译示例代码
 ```bash
 mkdir build & cd build
 cmake ..
 make -j
 ```
 ## 获取ONNX文件
 访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库，按照指引下载安装，下载`yolov5s-face.pt` 模型，利用 `export.py` 得到`onnx`格式文件。
 * 下载yolov5face模型文件
  ```
  Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q  
  https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing
  ```
 * 导出onnx格式文件
  ```bash
  PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1  
  ```
 * onnx模型简化(可选)
  ```bash
  onnxsim yolov5s-face.onnx yolov5s-face.onnx
  ```
 * 移动onnx文件到可执行文件的目录
  ```bash
  cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/cpp/build
  ```
 ## 准备测试图片
 准备一张包含人脸的测试图片，命名为test.jpg，并拷贝到可执行文件所在的目录
 ## 执行
 ```bash
 ./yolov5face_demo
 ```
 执行完后可视化的结果保存在本地`vis_result.jpg`，同时会将检测框输出在终端，如下所示
 ```
 aceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5]
 749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470)
 897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066)
 281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048)
 132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197)
 699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794)
 # ...
 ```
--- a/model_zoo/vision/yolov5face/cpp/yolov5face.cc
+++ b/model_zoo/vision/yolov5face/cpp/yolov5face.cc
@@ -0,0 +1,40 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision.h"
 int main() {
  namespace vis = fastdeploy::vision;
  auto model = vis::deepcam::YOLOv5Face("yolov5s-face.onnx");
  if (!model.Initialized()) {
    std::cerr << "Init Failed." << std::endl;
    return -1;
  }
  cv::Mat im = cv::imread("test.jpg");
  cv::Mat vis_im = im.clone();
  vis::FaceDetectionResult res;
  if (!model.Predict(&im, &res, 0.1f, 0.3f)) {
    std::cerr << "Prediction Failed." << std::endl;
    return -1;
  }
  // 输出预测框结果
  std::cout << res.Str() << std::endl;
  // 可视化预测结果
  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
  cv::imwrite("vis_result.jpg", vis_im);
  return 0;
 }
--- a/model_zoo/vision/yolov5face/yolov5face.py
+++ b/model_zoo/vision/yolov5face/yolov5face.py
@@ -0,0 +1,17 @@
 import fastdeploy as fd
 import cv2
 # 加载模型
 model = fd.vision.deepcam.YOLOv5Face("yolov5s-face.onnx")
 # 预测图片
 im = cv2.imread("test.jpg")
 result = model.predict(im, conf_threshold=0.1, nms_iou_threshold=0.3)
 # 可视化结果
 fd.vision.visualize.vis_face_detection(im, result)
 cv2.imwrite("vis_result.jpg", im)
 # 输出预测结果
 print(result)
 print(model.runtime_option)